home *** CD-ROM | disk | FTP | other *** search
/ Enter 2007 April / ENTER_CD_04_07.iso / Internet / WinHTTrack 3.23 / httrack-3.23.exe / {app} / src / htsparse.c < prev    next >
Encoding:
C/C++ Source or Header  |  2003-03-08  |  159.9 KB  |  3,679 lines

  1. /* ------------------------------------------------------------ */
  2. /*
  3. HTTrack Website Copier, Offline Browser for Windows and Unix
  4. Copyright (C) Xavier Roche and other contributors
  5.  
  6. This program is free software; you can redistribute it and/or
  7. modify it under the terms of the GNU General Public License
  8. as published by the Free Software Foundation; either version 2
  9. of the License, or any later version.
  10.  
  11. This program is distributed in the hope that it will be useful,
  12. but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14. GNU General Public License for more details.
  15.  
  16. You should have received a copy of the GNU General Public License
  17. along with this program; if not, write to the Free Software
  18. Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  19.  
  20.  
  21. Important notes:
  22.  
  23. - We hereby ask people using this source NOT to use it in purpose of grabbing
  24. emails addresses, or collecting any other private information on persons.
  25. This would disgrace our work, and spoil the many hours we spent on it.
  26.  
  27.  
  28. Please visit our Website: http://www.httrack.com
  29. */
  30.  
  31.  
  32. /* ------------------------------------------------------------ */
  33. /* File: htsparse.c parser                                      */
  34. /*       html/javascript/css parser                             */
  35. /*       and other parser routines                              */
  36. /* Author: Xavier Roche                                         */
  37. /* ------------------------------------------------------------ */
  38.  
  39.  
  40.  
  41. #include <stdio.h>
  42. #include <stdlib.h>
  43. #include <string.h>
  44. #include <time.h>
  45. #include <fcntl.h>
  46. #include <ctype.h>
  47.  
  48. /* File defs */
  49. #include "htscore.h"
  50.  
  51. /* specific definitions */
  52. #include "htsbase.h"
  53. #include "htsnet.h"
  54. #include "htsbauth.h"
  55. #include "htsmd5.h"
  56. #include "htsindex.h"
  57.  
  58. /* external modules */
  59. #include "htsmodules.h"
  60.  
  61. // htswrap_add
  62. #include "htswrap.h"
  63.  
  64. // parser
  65. #include "htsparse.h"
  66.  
  67.  
  68. // specific defines
  69. #define urladr   (liens[ptr]->adr)
  70. #define urlfil   (liens[ptr]->fil)
  71. #define savename (liens[ptr]->sav)
  72. #define test_flush if (opt->flush) { if (opt->log) { fflush(opt->log); } if (opt->errlog) { fflush(opt->errlog);  } }
  73.  
  74. // does nothing
  75. #define XH_uninit do {} while(0)
  76.  
  77. // version optimisΘe, qui permet de ne pas toucher aux html non modifiΘs (update)
  78. #define REALLOC_SIZE 8192
  79. #define HT_ADD_CHK(A) if (((int) (A)+ht_len+1) >= ht_size) { \
  80.   ht_size=(A)+ht_len+REALLOC_SIZE; \
  81.   ht_buff=(char*) realloct(ht_buff,ht_size); \
  82.   if (ht_buff==NULL) { \
  83.   printf("PANIC! : Not enough memory [%d]\n",__LINE__); \
  84.   XH_uninit; \
  85.   abortLogFmt("not enough memory for current html document in HT_ADD_CHK : realloct(%d) failed" _ ht_size); \
  86.   exit(1); \
  87.   } \
  88.   } \
  89.   ht_len+=A;
  90. #define HT_ADD_ADR \
  91.   if ((opt->getmode & 1) && (ptr>0)) { \
  92.   int i=((int) (adr - lastsaved)),j=ht_len; HT_ADD_CHK(i) \
  93.   memcpy(ht_buff+j, lastsaved, i); \
  94.   ht_buff[j+i]='\0'; \
  95.   lastsaved=adr; \
  96.   }
  97. #define HT_ADD(A) \
  98.   if ((opt->getmode & 1) && (ptr>0)) { \
  99.   int i=strlen(A),j=ht_len; \
  100.   if (i) { \
  101.   HT_ADD_CHK(i) \
  102.   memcpy(ht_buff+j, A, i); \
  103.   ht_buff[j+i]='\0'; \
  104.   } }
  105. #define HT_ADD_START \
  106.   int ht_size=(int)(r->size*5)/4+REALLOC_SIZE; \
  107.   int ht_len=0; \
  108.   char* ht_buff=NULL; \
  109.   if ((opt->getmode & 1) && (ptr>0)) { \
  110.   ht_buff=(char*) malloct(ht_size); \
  111.   if (ht_buff==NULL) { \
  112.   printf("PANIC! : Not enough memory [%d]\n",__LINE__); \
  113.   XH_uninit; \
  114.   abortLogFmt("not enough memory for current html document in HT_ADD_START : malloct(%d) failed" _ ht_size); \
  115.   exit(1); \
  116.   } \
  117.   ht_buff[0]='\0'; \
  118.   }
  119. #define HT_ADD_END { \
  120.   int ok=0;\
  121.   if (ht_buff) { \
  122.   INTsys file_len=(INTsys) strlen(ht_buff);\
  123.   char digest[32+2];\
  124.   digest[0]='\0';\
  125.   domd5mem(ht_buff,file_len,digest,1);\
  126.   if (fsize(antislash(savename))==file_len) { \
  127.   int mlen;\
  128.   char* mbuff;\
  129.   cache_readdata(cache,"//[HTML-MD5]//",savename,&mbuff,&mlen);\
  130.   if (mlen) mbuff[mlen]='\0';\
  131.   if ((mlen == 32) && (strcmp(((mbuff!=NULL)?mbuff:""),digest)==0)) {\
  132.   ok=1;\
  133.   if ( (opt->debug>1) && (opt->log!=NULL) ) {\
  134.   fspc(opt->log,"debug"); fprintf(opt->log,"File not re-written (md5): %s"LF,savename);\
  135.   test_flush;\
  136.   }\
  137.   } else {\
  138.   ok=0;\
  139.   } \
  140.   }\
  141.   if (!ok) { \
  142.   fp=filecreate(savename); \
  143.   if (fp) { \
  144.   if (file_len>0) {\
  145.   if ((INTsys)fwrite(ht_buff,1,file_len,fp) != file_len) { \
  146.   if (opt->errlog) {   \
  147.   fspc(opt->errlog,"error"); fprintf(opt->errlog,"Unable to write HTML file %s"LF,savename);\
  148.   test_flush;\
  149.   }\
  150.   }\
  151.   }\
  152.   fclose(fp); fp=NULL; \
  153.   if (strnotempty(r->lastmodified)) \
  154.   set_filetime_rfc822(savename,r->lastmodified); \
  155.   usercommand(0,NULL,antislash(savename)); \
  156.   } else {\
  157.   if (opt->errlog) { \
  158.   fspc(opt->errlog,"error");\
  159.   fprintf(opt->errlog,"Unable to save file %s"LF,savename);\
  160.   test_flush;\
  161.   }\
  162.   }\
  163.   } else {\
  164.   filenote(savename,NULL); \
  165.   }\
  166.   if (cache->ndx)\
  167.     cache_writedata(cache->ndx,cache->dat,"//[HTML-MD5]//",savename,digest,(int)strlen(digest));\
  168.   } \
  169.   freet(ht_buff); ht_buff=NULL; \
  170.   }
  171. #define HT_ADD_FOP 
  172.  
  173. // libΘrer filters[0] pour insΘrer un ΘlΘment dans filters[0]
  174. #define HT_INSERT_FILTERS0 do {\
  175.   int i;\
  176.   if (filptr>0) {\
  177.     for(i=filptr-1;i>=0;i--) {\
  178.       strcpybuff(filters[i+1],filters[i]);\
  179.     }\
  180.   }\
  181.   strcpybuff(filters[0],"");\
  182.   filptr++;\
  183.   filptr=minimum(filptr,filter_max);\
  184. } while(0)
  185.  
  186. // COPY IN HTSCORE.C
  187. #define HT_INDEX_END do { \
  188. if (!makeindex_done) { \
  189. if (makeindex_fp) { \
  190.   char tempo[1024]; \
  191.   if (makeindex_links == 1) { \
  192.     sprintf(tempo,"<meta HTTP-EQUIV=\"Refresh\" CONTENT=\"0; URL=%s\">"CRLF,makeindex_firstlink); \
  193.   } else \
  194.     tempo[0]='\0'; \
  195.   fprintf(makeindex_fp,template_footer, \
  196.     "<!-- Mirror and index made by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->", \
  197.     tempo \
  198.     ); \
  199.   fflush(makeindex_fp); \
  200.   fclose(makeindex_fp);  /* α ne pas oublier sinon on passe une nuit blanche */  \
  201.   makeindex_fp=NULL; \
  202.   usercommand(0,NULL,fconcat(opt->path_html,"index.html"));  \
  203. } \
  204. } \
  205. makeindex_done=1;    /* ok c'est fait */  \
  206. } while(0)
  207.  
  208. // Enregistrement d'un lien:
  209. // on calcule la taille nΘcessaire: taille des 3 chaεnes α stocker (taille forcΘe paire, plus 2 octets de sΘcuritΘ)
  210. // puis on vΘrifie qu'on a assez de marge dans le buffer - sinon on en rΘalloue un autre
  211. // enfin on Θcrit α l'adresse courante du buffer, qu'on incrΘmente. on dΘcrΘmente la taille dispo d'autant ensuite
  212. // codebase: si non nul et si .class stockee on le note pour chemin primaire pour classes
  213. // FA,FS: former_adr et former_fil, lien original
  214. #if HTS_HASH
  215. #define liens_record_sav_len(A) 
  216. #else
  217. #define liens_record_sav_len(A) (A)->sav_len=strlen((A)->sav)
  218. #endif
  219.  
  220. // COPIE DE HTSCORE.C
  221. #define liens_record(A,F,S,FA,FF) { \
  222. int notecode=0; \
  223. int lienurl_len=((sizeof(lien_url)+HTS_ALIGN-1)/HTS_ALIGN)*HTS_ALIGN,\
  224.   adr_len=strlen(A),\
  225.   fil_len=strlen(F),\
  226.   sav_len=strlen(S),\
  227.   cod_len=0,\
  228.   former_adr_len=strlen(FA),\
  229.   former_fil_len=strlen(FF); \
  230. if (former_adr_len>0) {\
  231.   former_adr_len=(former_adr_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; \
  232.   former_fil_len=(former_fil_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; \
  233. } else former_adr_len=former_fil_len=0;\
  234. if (strlen(F)>6) if (strnotempty(codebase)) if (strfield(F+strlen(F)-6,".class")) { notecode=1; \
  235. cod_len=strlen(codebase); cod_len=(cod_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; } \
  236. adr_len=(adr_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; fil_len=(fil_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; sav_len=(sav_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; \
  237. if ((int) lien_size < (int) (adr_len+fil_len+sav_len+cod_len+former_adr_len+former_fil_len+lienurl_len)) { \
  238. lien_buffer=(char*) ((void*) calloct(add_tab_alloc,1)); \
  239. lien_size=add_tab_alloc; \
  240. if (lien_buffer!=NULL) { \
  241. liens[lien_tot]=(lien_url*) (void*) lien_buffer; lien_buffer+=lienurl_len; lien_size-=lienurl_len; \
  242. liens[lien_tot]->firstblock=1; \
  243. } \
  244. } else { \
  245. liens[lien_tot]=(lien_url*) (void*) lien_buffer; lien_buffer+=lienurl_len; lien_size-=lienurl_len; \
  246. liens[lien_tot]->firstblock=0; \
  247. } \
  248. if (liens[lien_tot]!=NULL) { \
  249. liens[lien_tot]->adr=lien_buffer; lien_buffer+=adr_len; lien_size-=adr_len; \
  250. liens[lien_tot]->fil=lien_buffer; lien_buffer+=fil_len; lien_size-=fil_len; \
  251. liens[lien_tot]->sav=lien_buffer; lien_buffer+=sav_len; lien_size-=sav_len; \
  252. liens[lien_tot]->cod=NULL; \
  253. if (notecode) { liens[lien_tot]->cod=lien_buffer; lien_buffer+=cod_len; lien_size-=cod_len; strcpybuff(liens[lien_tot]->cod,codebase); } \
  254. if (former_adr_len>0) {\
  255. liens[lien_tot]->former_adr=lien_buffer; lien_buffer+=former_adr_len; lien_size-=former_adr_len; \
  256. liens[lien_tot]->former_fil=lien_buffer; lien_buffer+=former_fil_len; lien_size-=former_fil_len; \
  257. strcpybuff(liens[lien_tot]->former_adr,FA); \
  258. strcpybuff(liens[lien_tot]->former_fil,FF); \
  259. }\
  260. strcpybuff(liens[lien_tot]->adr,A); \
  261. strcpybuff(liens[lien_tot]->fil,F); \
  262. strcpybuff(liens[lien_tot]->sav,S); \
  263. liens_record_sav_len(liens[lien_tot]); \
  264. hash_write(hashptr,lien_tot);  \
  265. } \
  266. }
  267.  
  268. #define ENGINE_LOAD_CONTEXT() \
  269.   lien_url** liens = (lien_url**) str->liens; \
  270.   httrackp* opt = (httrackp*) str->opt; \
  271.   lien_back* back = (lien_back*) str->back; \
  272.   cache_back* cache = (cache_back*) str->cache; \
  273.   hash_struct* hashptr = (hash_struct*) str->hashptr; \
  274.   int back_max = str->back_max; \
  275.   int numero_passe = str->numero_passe; \
  276.   int add_tab_alloc = str->add_tab_alloc; \
  277.   /* */ \
  278.   int lien_tot = * ( (int*) (str->lien_tot_) ); \
  279.   int ptr = * ( (int*) (str->ptr_) ); \
  280.   int lien_size = * ( (int*) (str->lien_size_) ); \
  281.   char* lien_buffer = * ( (char**) (str->lien_buffer_) ); \
  282.   /* */ \
  283.   /* */ \
  284.   htsblk* r = stre->r_; \
  285.   int filptr = *stre->filptr_; \
  286.   char** filters = *stre->filters_; \
  287.   robots_wizard* robots = stre->robots_; \
  288.   hash_struct* hash = stre->hash_; \
  289.   int lien_max = *stre->lien_max_; \
  290.   /* */ \
  291.   int error = * stre->error_; \
  292.   int store_errpage = * stre->store_errpage_; \
  293.   char* codebase = stre->codebase; \
  294.   char* base = stre->base; \
  295.   /* */ \
  296.   int makeindex_done = *stre->makeindex_done_; \
  297.   FILE* makeindex_fp = *stre->makeindex_fp_; \
  298.   int makeindex_links = *stre->makeindex_links_; \
  299.   char* makeindex_firstlink = stre->makeindex_firstlink_; \
  300.   /* */ \
  301.   char *template_header = stre->template_header_; \
  302.   char *template_body = stre->template_body_; \
  303.   char *template_footer = stre->template_footer_; \
  304.   /* */ \
  305.   LLint stat_fragment = *stre->stat_fragment_; \
  306.   TStamp makestat_time = stre->makestat_time; \
  307.   FILE* makestat_fp = stre->makestat_fp
  308.  
  309. #define ENGINE_SAVE_CONTEXT() \
  310.   /* Apply changes */ \
  311.    * ( (int*) (str->lien_tot_) ) = lien_tot; \
  312.    * ( (int*) (str->ptr_) ) = ptr; \
  313.    * ( (int*) (str->lien_size_) ) = lien_size; \
  314.    * ( (char**) (str->lien_buffer_) ) = lien_buffer; \
  315.    /* */ \
  316.    * stre->error_ = error; \
  317.    * stre->store_errpage_ = store_errpage; \
  318.    * stre->filptr_ = filptr; \
  319.    * stre->filters_ = filters; \
  320.    * stre->lien_max_ = lien_max; \
  321.    /* */ \
  322.    *stre->makeindex_done_ = makeindex_done; \
  323.    *stre->makeindex_fp_ = makeindex_fp; \
  324.    *stre->makeindex_links_ = makeindex_links; \
  325.    /* */ \
  326.    *stre->stat_fragment_ = stat_fragment
  327.  
  328.  
  329.  
  330. /* Main parser */
  331. int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
  332.   /* Load engine variables */
  333.   ENGINE_LOAD_CONTEXT();
  334.   
  335. #if HTS_ANALYSTE
  336.   if (hts_htmlcheck(r->adr,(int)r->size,urladr,urlfil)) {
  337. #endif          
  338.     FILE* fp=NULL;      // fichier Θcrit localement 
  339.     char* adr=r->adr;    // pointeur (on parcourt)
  340.     char* lastsaved;    // adresse du dernier octet sauvΘ + 1
  341.     if ( (opt->debug>1) && (opt->log!=NULL) ) {
  342.       fspc(opt->log,"debug"); fprintf(opt->log,"scan file.."LF); test_flush;
  343.     }
  344.     
  345.     
  346.     // Indexing!
  347. #if HTS_MAKE_KEYWORD_INDEX
  348.     if (opt->kindex) {
  349.       if (index_keyword(r->adr,r->size,r->contenttype,savename,opt->path_html)) {
  350.         if ( (opt->debug>1) && (opt->log!=NULL) ) {
  351.           fspc(opt->log,"debug"); fprintf(opt->log,"indexing file..done"LF); test_flush;
  352.         }
  353.       } else {
  354.         if ( (opt->debug>1) && (opt->log!=NULL) ) {
  355.           fspc(opt->log,"debug"); fprintf(opt->log,"indexing file..error!"LF); test_flush;
  356.         }
  357.       }
  358.     }
  359. #endif
  360.     
  361.     // Now, parsing
  362.     if ((opt->getmode & 1) && (ptr>0)) {  // rΘcupΘrer les html sur disque       
  363.       // crΘer le fichier html local
  364.       HT_ADD_FOP;   // Θcrire peu α peu le fichier
  365.     }
  366.     
  367.     if (!error) {
  368.       int detect_title=0;  // dΘtection  du title
  369.       //
  370.       char* in_media=NULL; // in other media type (real media and so..)
  371.       int intag=0;         // on est dans un tag
  372.       int incomment=0;     // dans un <!--
  373.       int inscript=0;      // dans un scipt pour applets javascript)
  374.       int inscript_tag=0;  // on est dans un <body onLoad="... terminΘ par >
  375.       char inscript_tag_lastc='\0';  
  376.       // terminaison (" ou ') du "<body onLoad=.."
  377.       int inscriptgen=0;     // on est dans un code gΘnΘrant, ex aprΦs obj.write("..
  378.       char scriptgen_q='\0'; // caractΦre faisant office de guillemet (' ou ")
  379.       int no_esc_utf=0;      // ne pas echapper chars > 127
  380.       int nofollow=0;        // ne pas scanner
  381.       //
  382.       int parseall_lastc='\0';    // dernier caractΦre parsΘ pour parseall
  383.       int parseall_incomment=0;   // dans un /* */ (exemple: a = /* URL */ "img.gif";)
  384.       //
  385.       char* intag_start=adr;
  386.       char* intag_startattr=NULL;
  387.       int intag_start_valid=0;
  388.       HT_ADD_START;    // dΘbuter
  389.       
  390.       
  391.       /* statistics */
  392.       if ((opt->getmode & 1) && (ptr>0)) { 
  393.       /*
  394.       HTS_STAT.stat_files++;
  395.       HTS_STAT.stat_bytes+=r->size;
  396.         */
  397.       }
  398.       
  399.       /* Primary list or URLs */
  400.       if (ptr == 0) {
  401.         intag=1;
  402.         intag_start_valid=0;
  403.       }
  404.       /* Check is the file is a .js file */
  405.       else if (
  406.         (strfield2(r->contenttype,"application/x-javascript")!=0)
  407.         || (strfield2(r->contenttype,"text/css")!=0)
  408.         ) {      /* JavaScript js file */
  409.         inscript=1;
  410.         intag=1;     // because aprΦs <script> on y est .. - pas utile
  411.         intag_start_valid=0;    // OUI car nous sommes dans du code, plus dans du "vrai" tag
  412.         if ((opt->debug>1) && (opt->log!=NULL)) {
  413.           fspc(opt->log,"debug"); fprintf(opt->log,"note: this file is a javascript file"LF); test_flush;
  414.         }
  415.       }
  416.       /* Or a real audio */
  417.       else if (strfield2(r->contenttype,"audio/x-pn-realaudio")!=0) {      /* realaudio link file */
  418.         inscript=intag=1;
  419.         intag_start_valid=0;
  420.         in_media="RAM";       // real media!
  421.       }
  422.       // Detect UTF8 format
  423.       if (is_unicode_utf8((unsigned char*) r->adr, (unsigned int) r->size) == 1) {
  424.         no_esc_utf=1;
  425.       } else {
  426.         no_esc_utf=0;
  427.       }
  428.       // Hack to prevent any problems with ram files of other files
  429.       * ( r->adr + r->size ) = '\0';
  430.       
  431.       
  432.       // ------------------------------------------------------------
  433.       // analyser ce qu'il y a en mΘmoire (fichier html)
  434.       // on scanne les balises
  435.       // ------------------------------------------------------------
  436. #if HTS_ANALYSTE
  437.       _hts_in_html_done=0;     // 0% scannΘs
  438.       _hts_cancel=0;           // pas de cancel
  439.       _hts_in_html_parsing=1;  // flag pour indiquer un parsing
  440. #endif
  441.       base[0]='\0';    // effacer base-href
  442.       lastsaved=adr;
  443.       do {
  444.         int p=0;
  445.         int valid_p=0;      // force to take p even if == 0
  446.         int ending_p='\0';  // ending quote?
  447.         error=0;
  448.         
  449.         /* Hack to avoid NULL char problems with C syntax */
  450.         /* Yes, some bogus HTML pages can embed null chars
  451.         and therefore can not be properly handled if this hack is not done
  452.         */
  453.         if ( ! (*adr) ) {
  454.           if ( ((int) (adr - r->adr)) < r->size)
  455.             *adr=' ';
  456.         }
  457.         
  458.         
  459.         
  460.         /*
  461.         index.html built here
  462.         */
  463.         // Construction index.html (sommaire)
  464.         // Avant de tester les a href,
  465.         // Ici on teste si l'on doit construire l'index vers le(s) site(s) miroir(s)
  466.         if (!makeindex_done) {  // autoriation d'Θcrire un index
  467.           if (!detect_title) {
  468.             if (opt->depth == liens[ptr]->depth) {    // on note toujours les premiers liens
  469.               if (!in_media) {
  470.                 if (opt->makeindex && (ptr>0)) {
  471.                   if (opt->getmode & 1) {  // autorisation d'Θcrire
  472.                     p=strfield(adr,"title");  
  473.                     if (p) {
  474.                       if (*(adr-1)=='/') p=0;    // /title
  475.                     } else {
  476.                       if (strfield(adr,"/html"))
  477.                         p=-1;                    // noter, mais sans titre
  478.                       else if (strfield(adr,"body"))
  479.                         p=-1;                    // noter, mais sans titre
  480.                       else if ( ((int) (adr - r->adr) ) >= (r->size-1) )
  481.                         p=-1;                    // noter, mais sans titre
  482.                       else if ( (int) (adr - r->adr) >= r->size - 2)   // we got to hurry
  483.                         p=-1; // xxc xxc xxc
  484.                     }
  485.                   } else
  486.                     p=0;
  487.                   
  488.                   if (p) {    // ok center                            
  489.                     if (makeindex_fp==NULL) {
  490.                       verif_backblue(opt->path_html);    // gΘnΘrer gif
  491.                       makeindex_fp=filecreate(fconcat(opt->path_html,"index.html"));
  492.                       if (makeindex_fp!=NULL) {
  493.                         
  494.                         // Header
  495.                         fprintf(makeindex_fp,template_header,
  496.                           "<!-- Mirror and index made by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"
  497.                           );
  498.                         
  499.                       } else makeindex_done=-1;    // fait, erreur
  500.                     }
  501.                     
  502.                     if (makeindex_fp!=NULL) {
  503.                       char tempo[HTS_URLMAXSIZE*2];
  504.                       char s[HTS_URLMAXSIZE*2];
  505.                       char* a=NULL;
  506.                       char* b=NULL;
  507.                       s[0]='\0';
  508.                       if (p>0) {
  509.                         a=strchr(adr,'>');
  510.                         if (a!=NULL) {
  511.                           a++;
  512.                           while(is_space(*a)) a++;    // sauter espaces & co
  513.                           b=strchr(a,'<');   // prochain tag
  514.                         }
  515.                       }
  516.                       if (lienrelatif(tempo,liens[ptr]->sav,concat(opt->path_html,"index.html"))==0) {
  517.                         detect_title=1;      // ok dΘtectΘ pour cette page!
  518.                         makeindex_links++;   // un de plus
  519.                         strcpybuff(makeindex_firstlink,tempo);
  520.                         //
  521.                         if ((b==a) || (a==NULL) || (b==NULL)) {    // pas de titre
  522.                           strcpybuff(s,tempo);
  523.                         } else if ((b-a)<256) {
  524.                           b--;
  525.                           while(is_space(*b)) b--;
  526.                           strncpy(s,a,b-a+1);
  527.                           *(s+(b-a)+1)='\0';
  528.                         }
  529.                         
  530.                         // Body
  531.                         fprintf(makeindex_fp,template_body,
  532.                           tempo,
  533.                           s
  534.                           );
  535.                         
  536.                       }
  537.                     }
  538.                   }
  539.                 }
  540.               }
  541.               
  542.             } else if (liens[ptr]->depth<opt->depth) {   // on a sautΘ level1+1 et level1
  543.               HT_INDEX_END;
  544.             }
  545.           } // if (opt->makeindex)
  546.         }
  547.         // FIN Construction index.html (sommaire)
  548.         /*
  549.         end -- index.html built here
  550.         */
  551.         
  552.         
  553.         
  554.         /* Parse */
  555.         if (
  556.           (*adr=='<')    /* No starting tag */
  557.           && (!inscript)    /* Not in (java)script */
  558.           && (!incomment)   /* Not in comment (<!--) */
  559.           ) { 
  560.           intag=1;
  561.           parseall_incomment=0;
  562.           //inquote=0;  // effacer quote
  563.           intag_start=adr; intag_start_valid=1;
  564.           codebase[0]='\0';    // effacer Θventuel codebase
  565.           
  566.           if (opt->getmode & 1) {  // sauver html
  567.             p=strfield(adr,"</html");
  568.             if (p==0) p=strfield(adr,"<head>");
  569.             // if (p==0) p=strfield(adr,"<doctype");
  570.             if (p) {
  571.               if (strnotempty(opt->footer)) {
  572.                 char tempo[1024+HTS_URLMAXSIZE*2];
  573.                 char gmttime[256];
  574.                 char* eol="\n";
  575.                 tempo[0]='\0';
  576.                 if (strchr(r->adr,'\r'))
  577.                   eol="\r\n";
  578.                 time_gmt_rfc822(gmttime);
  579.                 strcatbuff(tempo,eol);
  580.                 sprintf(tempo+strlen(tempo),opt->footer,jump_identification(urladr),urlfil,gmttime,HTTRACK_VERSIONID,"","","","","","","");
  581.                 strcatbuff(tempo,eol);
  582.                 //fwrite(tempo,1,strlen(tempo),fp);
  583.                 HT_ADD(tempo);
  584.               }
  585.             }
  586.           }        
  587.           
  588.           // Θliminer les <!-- (commentaires) : intag dΘvalidΘ
  589.           if (*(adr+1)=='!')
  590.             if (*(adr+2)=='-')
  591.               if (*(adr+3)=='-') {
  592.                 intag=0;
  593.                 incomment=1;
  594.                 intag_start_valid=0;
  595.               }
  596.               
  597.         }
  598.         else if (
  599.           (*adr=='>')                        /* ending tag */
  600.           && ( (!inscript) || (inscript_tag) )  /* and in tag (or in script) */
  601.           ) {
  602.           if (inscript_tag) {
  603.             inscript_tag=inscript=0;
  604.             intag=0;
  605.             incomment=0;
  606.             intag_start_valid=0;
  607.           } else if (!incomment) {
  608.             intag=0; //inquote=0;
  609.             
  610.             // entrΘe dans du javascript?
  611.             // on parse ICI car il se peut qu'on ait eu a parser les src=.. dedans
  612.             //if (!inscript) {  // sinon on est dans un obj.write("..
  613.             if ((intag_start_valid) && 
  614.               (
  615.               check_tag(intag_start,"script")
  616.               ||
  617.               check_tag(intag_start,"style")
  618.               )
  619.               ) {
  620.               char* a=intag_start;    // <
  621.               // ** while(is_realspace(*(--a)));
  622.               if (*a=='<') {  // s√r que c'est un tag?
  623.                 inscript=1;
  624.                 intag=1;     // because aprΦs <script> on y est .. - pas utile
  625.                 intag_start_valid=0;    // OUI car nous sommes dans du code, plus dans du "vrai" tag
  626.               }
  627.             }
  628.           } else {                               /* end of comment? */
  629.             // vΘrifier fermeture correcte
  630.             if ( (*(adr-1)=='-') && (*(adr-2)=='-') ) {
  631.               intag=0;
  632.               incomment=0;
  633.               intag_start_valid=0;
  634.             }
  635. #if GT_ENDS_COMMENT
  636.             /* wrong comment ending */
  637.             else {
  638.             /* check if correct ending does not exists
  639.             <!-- foo > example <!-- bar > is sometimes accepted by browsers
  640.             when no --> is used somewhere else.. darn those browsers are dirty
  641.               */
  642.               if (!strstr(adr,"-->")) {
  643.                 intag=0;
  644.                 incomment=0;
  645.                 intag_start_valid=0;
  646.               }
  647.             }
  648. #endif
  649.           }
  650.           //}
  651.         }
  652.         //else if (*adr==34) {
  653.         //  inquote=(inquote?0:1);
  654.         //}
  655.         else if (intag || inscript) {    // nous sommes dans un tag/commentaire, tester si on recoit un tag
  656.           int p_type=0;
  657.           int p_nocatch=0;
  658.           int p_searchMETAURL=0;  // chercher ..URL=<url>
  659.           int add_class=0;        // ajouter .class
  660.           int add_class_dots_to_patch=0;   // number of '.' in code="x.y.z<realname>"
  661.           char* p_flush=NULL;
  662.           
  663.           
  664.           // ------------------------------------------------------------
  665.           // parsing ΘvolΘ
  666.           // ------------------------------------------------------------
  667.           if (((isalpha((unsigned char)*adr)) || (*adr=='/') || (inscript) || (inscriptgen))) {  // sinon pas la peine de tester..
  668.             
  669.             
  670.                                                                                                  /* caractΦre de terminaison pour "miniparsing" javascript=.. ? 
  671.             (ex: <a href="javascript:()" action="foo"> ) */
  672.             if (inscript_tag) {
  673.               if (inscript_tag_lastc) {
  674.                 if (*adr == inscript_tag_lastc) {
  675.                   /* sortir */
  676.                   inscript_tag=inscript=0;
  677.                   incomment=0;
  678.                 }
  679.               }
  680.             }
  681.             
  682.             
  683.             // Note:
  684.             // Certaines pages ne respectent pas le html
  685.             // notamment les guillements ne sont pas fixΘs
  686.             // Nous sommes dans un tag, donc on peut faire un test plus
  687.             // large pour pouvoi prendre en compte ces particularitΘs
  688.             
  689.             // α vΘrifier: ACTION, CODEBASE, VRML
  690.             
  691.             if (in_media) {
  692.               if (strcmp(in_media,"RAM")==0) { // real media
  693.                 p=0;
  694.                 valid_p=1;
  695.               }
  696.             } else if (ptr>0) {        /* pas premiΦre page 0 (primary) */
  697.               p=0;  // saut pour le nom de fichier: adresse nom fichier=adr+p
  698.               
  699.               // ------------------------------
  700.               // dΘtection d'Θcriture JavaScript.
  701.               // osons les obj.write et les obj.href=.. ! osons!
  702.               // note: inscript==1 donc on sautera aprΦs les \"
  703.               if (inscript) {
  704.                 if (inscriptgen) {          // on est dΘja dans un objet gΘnΘrant..
  705.                   if (*adr==scriptgen_q) {  // fermeture des " ou '
  706.                     if (*(adr-1)!='\\') {   // non
  707.                       inscriptgen=0;        // ok parsing terminΘ
  708.                     }
  709.                   }
  710.                 } else {
  711.                   char* a=NULL;
  712.                   char check_this_fking_line=0;  // parsing code javascript..
  713.                   char must_be_terminated=0;     // caractΦre obligatoire de terminaison!
  714.                   int token_size;
  715.                   if (!(token_size=strfield(adr,".writeln"))) // dΘtection ...objet.write[ln]("code html")...
  716.                     token_size=strfield(adr,".write");
  717.                   if (token_size) {
  718.                     a=adr+token_size;
  719.                     while(is_realspace(*a)) a++; // sauter espaces
  720.                     if (*a=='(') {  // dΘbut parenthΦse
  721.                       check_this_fking_line=2;  // α parser!
  722.                       must_be_terminated=')';
  723.                       a++;  // sauter (
  724.                     }
  725.                   }
  726.                   // euhh ??? ???
  727.                   /* else if (strfield(adr,".href")) {  // dΘtection ...objet.href="...
  728.                   a=adr+5;
  729.                   while(is_realspace(*a)) a++; // sauter espaces
  730.                   if (*a=='=') {  // ohh un Θgal
  731.                   check_this_fking_line=1;  // α noter!
  732.                   must_be_terminated=';';   // et si t'as oubliΘ le ; tu sais pas coder
  733.                   a++;   // sauter =
  734.                   }
  735.                   
  736.                 }*/
  737.                   
  738.                   // on a un truc du genre instruction"code gΘnΘrΘ" dont on parse le code
  739.                   if (check_this_fking_line) {
  740.                     while(is_realspace(*a)) a++;
  741.                     if ((*a=='\'') || (*a=='"')) {  // dΘpart de '' ou ""
  742.                       char *b;
  743.                       int ex=0;
  744.                       scriptgen_q=*a;    // quote
  745.                       b=a+1;      // dΘpart de la chaεne
  746.                       // vΘrifier forme ("code") et pas ("code"+var), ingΘrable
  747.                       do {
  748.                         a++;  // caractΦre suivant
  749.                         if (*a==scriptgen_q) if (*(a-1)!='\\')  // quote non slash
  750.                           ex=1;            // sortie
  751.                         if ((*a==10) || (*a==13))
  752.                           ex=1;
  753.                       } while(!ex);
  754.                       if (*a==scriptgen_q) {  // fin du quote
  755.                         a++;
  756.                         while(is_realspace(*a)) a++;
  757.                         if (*a==must_be_terminated) {  // parenthΦse fermante: ("..")
  758.                           
  759.                           // bon, on doit parser une ligne javascript
  760.                           // 1) si check.. ==1 alors c'est un nom de fichier direct, donc
  761.                           // on fixe p sur le saut nΘcessaire pour atteindre le nom du fichier
  762.                           // et le moteur se dΘbrouillera ensuite tout seul comme un grand
  763.                           // 2) si check==2 c'est un peu plus tordu car lα on gΘnΘre du
  764.                           // code html au sein de code javascript au sein de code html
  765.                           // dans ce cas on doit fixer un flag α un puis ensuite dans la boucle
  766.                           // on devra parser les instructions standard comme <a href etc
  767.                           // NOTE: le code javascript autogΘnΘrΘ n'est pas pris en compte!!
  768.                           // (et ne marche pas dans 50% des cas de toute facon!)
  769.                           if (check_this_fking_line==1) {
  770.                             p=(int) (b - adr);    // calculer saut!
  771.                           } else {
  772.                             inscriptgen=1;        // SCRIPTGEN actif
  773.                             adr=b;                // jump
  774.                           }
  775.                           
  776.                           if ((opt->debug>1) && (opt->log!=NULL)) {
  777.                             char str[512];
  778.                             str[0]='\0';
  779.                             strncatbuff(str,b,minimum((int) (a - b + 1), 32));
  780.                             fspc(opt->log,"debug"); fprintf(opt->log,"active code (%s) detected in javascript: %s"LF,(check_this_fking_line==2)?"parse":"pickup",str); test_flush;
  781.                           }
  782.                         }
  783.                         
  784.                       }
  785.                       
  786.                     }
  787.                     
  788.                     
  789.                   }
  790.                 }
  791.               }
  792.               // fin detection code gΘnΘrant javascript vers html
  793.               // ------------------------------
  794.               
  795.               
  796.               // analyse proprement dite, A HREF=.. etc..
  797.               if (!p) {
  798.                 // si dans un tag, et pas dans un script - sauf si on analyse un obj.write("..
  799.                 if ((intag && (!inscript)) || inscriptgen) {
  800.                   if ( (*(adr-1)=='<') || (is_space(*(adr-1))) ) {   // <tag < tag etc
  801.                     // <A HREF=.. pour les liens HTML
  802.                     p=rech_tageq(adr,"href");
  803.                     if (p) {    // href.. tester si c'est une bas href!
  804.                       if ((intag_start_valid) && check_tag(intag_start,"base")) {  // oui!
  805.                         // ** note: base href et codebase ne font pas bon mΘnage..
  806.                         p_type=2;    // c'est un chemin
  807.                       }
  808.                     }
  809.                     
  810.                     /* Tags supplΘmentaires α vΘrifier (<img src=..> etc) */
  811.                     if (p==0) {
  812.                       int i=0;
  813.                       while( (p==0) && (strnotempty(hts_detect[i])) ) {
  814.                         p=rech_tageq(adr,hts_detect[i]);
  815.                         i++;
  816.                       }
  817.                     }
  818.                     
  819.                     /* Tags supplΘmentaires en dΘbut α vΘrifier (<object .. hotspot1=..> etc) */
  820.                     if (p==0) {
  821.                       int i=0;
  822.                       while( (p==0) && (strnotempty(hts_detectbeg[i])) ) {
  823.                         p=rech_tageqbegdigits(adr,hts_detectbeg[i]);
  824.                         i++;
  825.                       }
  826.                     }
  827.                     
  828.                     /* Tags supplΘmentaires α vΘrifier : URL=.. */
  829.                     if (p==0) {
  830.                       int i=0;
  831.                       while( (p==0) && (strnotempty(hts_detectURL[i])) ) {
  832.                         p=rech_tageq(adr,hts_detectURL[i]);
  833.                         i++;
  834.                       }
  835.                       if (p)
  836.                         p_searchMETAURL=1;
  837.                     }
  838.                     
  839.                     /* Tags supplΘmentaires α vΘrifier, mais α ne pas capturer */
  840.                     if (p==0) {
  841.                       int i=0;
  842.                       while( (p==0) && (strnotempty(hts_detectandleave[i])) ) {
  843.                         p=rech_tageq(adr,hts_detectandleave[i]);
  844.                         i++;
  845.                       }
  846.                       if (p)
  847.                         p_nocatch=1;      /* ne pas rechercher */
  848.                     }
  849.                     
  850.                     /* EvΘnements */
  851.                     if (p==0) {
  852.                       int i=0;
  853.                       /* dΘtection onLoad etc */
  854.                       while( (p==0) && (strnotempty(hts_detect_js[i])) ) {
  855.                         p=rech_tageq(adr,hts_detect_js[i]);
  856.                         i++;
  857.                       }
  858.                       /* non dΘtectΘ - dΘtecter Θgalement les onXxxxx= */
  859.                       if (p==0) {
  860.                         if ( (*adr=='o') && (*(adr+1)=='n') && isUpperLetter(*(adr+2)) ) {
  861.                           p=0;
  862.                           while(isalpha((unsigned char)adr[p]) && (p<64) ) p++;
  863.                           if (p<64) {
  864.                             while(is_space(adr[p])) p++;
  865.                             if (adr[p]=='=')
  866.                               p++;
  867.                             else p=0;
  868.                           } else p=0;
  869.                         }
  870.                       }
  871.                       /* OK, ΘvΘnement repΘrΘ */
  872.                       if (p) {
  873.                         inscript_tag_lastc=*(adr+p);     /* α attendre α la fin */
  874.                         adr+=p;     /* saut */
  875.                                     /*
  876.                                     On est dΘsormais dans du code javascript
  877.                         */
  878.                         inscript_tag=inscript=1;
  879.                       }
  880.                       p=0;        /* quoi qu'il arrive, ne rien dΘmarrer ici */
  881.                     }
  882.                     
  883.                     // <APPLET CODE=.. pour les applet java.. [CODEBASE (chemin..) α faire]
  884.                     if (p==0) {
  885.                       p=rech_tageq(adr,"code");
  886.                       if (p) {
  887.                         if ((intag_start_valid) && check_tag(intag_start,"applet")) {  // dans un <applet !
  888.                           p_type=-1;  // juste le nom de fichier+dossier, Θcire avant codebase 
  889.                           add_class=1;   // ajouter .class au besoin                         
  890.                           
  891.                           // vΘrifier qu'il n'y a pas de codebase APRES
  892.                           // sinon on swappe les deux.
  893.                           // pas trΦs propre mais c'est ce qu'il y a de plus simple α faire!!
  894.                           
  895.                           {
  896.                             char *a;
  897.                             a=adr;
  898.                             while((*a) && (*a!='>') && (!rech_tageq(a,"codebase"))) a++;
  899.                             if (rech_tageq(a,"codebase")) {  // banzai! codebase=
  900.                               char* b;
  901.                               b=strchr(a,'>');
  902.                               if (b) {
  903.                                 if (((int) (b - adr)) < 1000) {    // au total < 1Ko
  904.                                   char tempo[HTS_URLMAXSIZE*2];
  905.                                   tempo[0]='\0';
  906.                                   strncatbuff(tempo,a,(int) (b - a) );
  907.                                   strcatbuff( tempo," ");
  908.                                   strncatbuff(tempo,adr,(int) (a - adr - 1));
  909.                                   // Θventuellement remplire par des espaces pour avoir juste la taille
  910.                                   while((int) strlen(tempo)<((int) (b - adr)))
  911.                                     strcatbuff(tempo," ");
  912.                                   // pas d'erreur?
  913.                                   if ((int) strlen(tempo) == ((int) (b - adr) )) {
  914.                                     strncpy(adr,tempo,strlen(tempo));   // PAS d'octet nul α la fin!
  915.                                     p=0;    // DEVALIDER!!
  916.                                     p_type=0;
  917.                                     add_class=0;
  918.                                   }
  919.                                 }
  920.                               }
  921.                             }
  922.                           }
  923.                           
  924.                         }
  925.                       }
  926.                     }
  927.                     
  928.                     // liens α patcher mais pas α charger (ex: codebase)
  929.                     if (p==0) {  // note: si non chargΘ (ex: ignorer .class) patchΘ tout de mΩme
  930.                       p=rech_tageq(adr,"codebase");
  931.                       if (p) {
  932.                         if ((intag_start_valid) && check_tag(intag_start,"applet")) {  // dans un <applet !
  933.                           p_type=-2;
  934.                         } else p=-1;   // ne plus chercher
  935.                       }
  936.                     }
  937.                     
  938.                     
  939.                     // Meta tags pour robots
  940.                     if (p==0) {
  941.                       if (opt->robots) {
  942.                         if ((intag_start_valid) && check_tag(intag_start,"meta")) {
  943.                           if (rech_tageq(adr,"name")) {    // name=robots.txt
  944.                             char tempo[1100];
  945.                             char* a;
  946.                             tempo[0]='\0';
  947.                             a=strchr(adr,'>');
  948. #if DEBUG_ROBOTS
  949.                             printf("robots.txt meta tag detected\n");
  950. #endif
  951.                             if (a) {
  952.                               if (((int) (a - adr)) < 999 ) {
  953.                                 strncatbuff(tempo,adr,(int) (a - adr));
  954.                                 if (strstrcase(tempo,"content")) {
  955.                                   if (strstrcase(tempo,"robots")) {
  956.                                     if (strstrcase(tempo,"nofollow")) {
  957. #if DEBUG_ROBOTS
  958.                                       printf("robots.txt meta tag: nofollow in %s%s\n",urladr,urlfil);
  959. #endif
  960.                                       nofollow=1;       // NE PLUS suivre liens dans cette page
  961.                                       if (opt->errlog) {
  962.                                         fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link %s%s not scanned (follow robots meta tag)"LF,urladr,urlfil);
  963.                                         test_flush;
  964.                                       }
  965.                                     }
  966.                                   }
  967.                                 }
  968.                               }
  969.                             }
  970.                           }
  971.                         }
  972.                       }
  973.                     }
  974.                     
  975.                     // entrΘe dans une applet javascript
  976.                     /*if (!inscript) {  // sinon on est dans un obj.write("..
  977.                     if (p==0)
  978.                     if (rech_sampletag(adr,"script"))
  979.                     if (check_tag(intag_start,"script")) {
  980.                     inscript=1;
  981.                     }
  982.                         }*/
  983.                     
  984.                     // Ici on procΦde α une analyse du code javascript pour tenter de rΘcupΘrer
  985.                     // certains fichiers Θvidents.
  986.                     // C'est devenu obligatoire vu le nombre de pages qui intΦgrent
  987.                     // des images rΘactives par exemple
  988.                 }
  989.               } else if (inscript) {
  990.                 if (
  991.                   (
  992.                   (strfield(adr,"/script"))
  993.                   ||
  994.                   (strfield(adr,"/style"))
  995.                   )
  996.                   ) {
  997.                   char* a=adr;
  998.                   //while(is_realspace(*(--a)));
  999.                   while( is_realspace(*a) ) a--;
  1000.                   a--;
  1001.                   if (*a=='<') {  // s√r que c'est un tag?
  1002.                     inscript=0;
  1003.                   }
  1004.                 } else {
  1005.                 /*
  1006.                 Script Analyzing - different types supported:
  1007.                 foo="url"
  1008.                 foo("url") or foo(url)
  1009.                 foo "url"
  1010.                   */
  1011.                   int nc;
  1012.                   char  expected     = '=';          // caractΦre attendu aprΦs
  1013.                   char* expected_end = ";";
  1014.                   int can_avoid_quotes=0;
  1015.                   char quotes_replacement='\0';
  1016.                   if (inscript_tag)
  1017.                     expected_end=";\"\'";            // voir a href="javascript:doc.location='foo'"
  1018.                   nc = strfield(adr,".src");  // nom.src="image";
  1019.                   if (!nc) nc = strfield(adr,".location");  // document.location="doc"
  1020.                   if (!nc) nc = strfield(adr,".href");  // document.location="doc"
  1021.                   if (!nc) if ( (nc = strfield(adr,".open")) ) { // window.open("doc",..
  1022.                     expected='(';    // parenthΦse
  1023.                     expected_end="),";  // fin: virgule ou parenthΦse
  1024.                   }
  1025.                   if (!nc) if ( (nc = strfield(adr,".replace")) ) { // window.replace("url")
  1026.                     expected='(';    // parenthΦse
  1027.                     expected_end=")";  // fin: parenthΦse
  1028.                   }
  1029.                   if (!nc) if ( (nc = strfield(adr,".link")) ) { // window.link("url")
  1030.                     expected='(';    // parenthΦse
  1031.                     expected_end=")";  // fin: parenthΦse
  1032.                   }
  1033.                   if (!nc) if ( (nc = strfield(adr,"url")) && (!isalnum(adr[-1])) ) { // url(url)
  1034.                     expected='(';    // parenthΦse
  1035.                     expected_end=")";  // fin: parenthΦse
  1036.                     can_avoid_quotes=1;
  1037.                     quotes_replacement=')';
  1038.                   }
  1039.                   if (!nc) if ( (nc = strfield(adr,"import")) ) { // import "url"
  1040.                     if (is_space(*(adr+nc))) {
  1041.                       expected=0;    // no char expected
  1042.                     } else
  1043.                       nc=0;
  1044.                   }
  1045.                   if (nc) {
  1046.                     char *a;
  1047.                     a=adr+nc;
  1048.                     while(is_realspace(*a)) a++;
  1049.                     if ((*a == expected) || (!expected)) {
  1050.                       if (expected)
  1051.                         a++;
  1052.                       while(is_realspace(*a)) a++;
  1053.                       if ((*a==34) || (*a=='\'') || (can_avoid_quotes)) {
  1054.                         char *b,*c;
  1055.                         int ndelim=1;
  1056.                         if ((*a==34) || (*a=='\''))
  1057.                           a++;
  1058.                         else
  1059.                           ndelim=0;
  1060.                         b=a;
  1061.                         if (ndelim) {
  1062.                           while((*b!=34) && (*b!='\'') && (*b!='\0')) b++;
  1063.                         }
  1064.                         else {
  1065.                           while((*b != quotes_replacement) && (*b!='\0')) b++;
  1066.                         }
  1067.                         c=b--; c+=ndelim;
  1068.                         while(*c==' ') c++;
  1069.                         if ((strchr(expected_end,*c)) || (*c=='\n') || (*c=='\r')) {
  1070.                           c-=(ndelim+1);
  1071.                           if ((int) (c - a + 1)) {
  1072.                             if ((opt->debug>1) && (opt->log!=NULL)) {
  1073.                               char str[512];
  1074.                               str[0]='\0';
  1075.                               strncatbuff(str,a,minimum((int) (c - a + 1),32));
  1076.                               fspc(opt->log,"debug"); fprintf(opt->log,"link detected in javascript: %s"LF,str); test_flush;
  1077.                             }
  1078.                             p=(int) (a - adr);    // p non nul: TRAITER CHAINE COMME FICHIER
  1079.                             if (can_avoid_quotes) {
  1080.                               ending_p=quotes_replacement;
  1081.                             }
  1082.                           }
  1083.                         }
  1084.                         
  1085.                         
  1086.                       }
  1087.                     }
  1088.                   }
  1089.                   
  1090.                 }
  1091.               }
  1092.             }
  1093.             
  1094.           } else {      // ptr == 0
  1095.             //p=rech_tageq(adr,"primary");    // lien primaire, yeah
  1096.             p=0;          // No stupid tag anymore, raw link
  1097.             valid_p=1;    // Valid even if p==0
  1098.             while ((adr[p] == '\r') || (adr[p] == '\n'))
  1099.               p++;
  1100.             //can_avoid_quotes=1;
  1101.             ending_p='\r';
  1102.           }       
  1103.           
  1104.         } else if (isspace((unsigned char)*adr)) {
  1105.           intag_startattr=adr+1;        // attribute in tag (for dirty parsing)
  1106.         }
  1107.         
  1108.         
  1109.         // ------------------------------------------------------------
  1110.         // dernier recours - parsing "sale" : dΘtection systΘmatique des .gif, etc.
  1111.         // risque: gΘnΘrer de faux fichiers parazites
  1112.         // fix: ne parse plus dans les commentaires
  1113.         // ------------------------------------------------------------
  1114.         if ( (opt->parseall) && (ptr>0) && (!in_media) ) {           // option parsing "brut"
  1115.           int incomment_justquit=0;
  1116.           if (!is_realspace(*adr)) {
  1117.             int noparse=0;
  1118.             
  1119.             // Gestion des /* */
  1120.             if (inscript) {
  1121.               if (parseall_incomment) {
  1122.                 if ((*adr=='/') && (*(adr-1)=='*'))
  1123.                   parseall_incomment=0;
  1124.                 incomment_justquit=1;       // ne pas noter dernier caractΦre
  1125.               } else {
  1126.                 if ((*adr=='/') && (*(adr+1)=='*'))
  1127.                   parseall_incomment=1;
  1128.               }
  1129.             } else
  1130.               parseall_incomment=0;
  1131.             
  1132.             /* vΘrifier que l'on est pas dans un <!-- --> pur */
  1133.             if ( (!intag) && (incomment) && (!inscript))
  1134.               noparse=1;        /* commentaire */
  1135.             
  1136.             // recherche d'URLs
  1137.             if ((!parseall_incomment) && (!noparse)) {
  1138.               if (!p) {                   // non dΘja trouvΘ
  1139.                 if (adr != r->adr) {     // >1 caractΦre
  1140.                   // scanner les chaines
  1141.                   if ((*adr == '\"') || (*adr=='\'')) {         // "xx.gif" 'xx.gif'
  1142.                     if (strchr("=(,",parseall_lastc)) {    // exemple: a="img.gif..
  1143.                       char *a=adr;
  1144.                       char stop=*adr;  // " ou '
  1145.                       int count=0;
  1146.                       
  1147.                       // sauter caractΦres
  1148.                       a++;
  1149.                       // copier
  1150.                       while((*a) && (*a!='\'') && (*a!='\"') && (count<HTS_URLMAXSIZE)) { count++; a++; }
  1151.                       
  1152.                       // ok chaine terminΘe par " ou '
  1153.                       if ((*a == stop) && (count<HTS_URLMAXSIZE) && (count>0)) {
  1154.                         char c;
  1155.                         char* aend;
  1156.                         //
  1157.                         aend=a;     // sauver dΘbut
  1158.                         a++;
  1159.                         while(is_taborspace(*a)) a++;
  1160.                         c=*a;
  1161.                         if (strchr("),;>/+\r\n",c)) {     // exemple: ..img.gif";
  1162.                           // le / est pour funct("img.gif" /* URL */);
  1163.                           char tempo[HTS_URLMAXSIZE*2];
  1164.                           char type[256];
  1165.                           int url_ok=0;      // url valide?
  1166.                           tempo[0]='\0'; type[0]='\0';
  1167.                           //
  1168.                           strncatbuff(tempo,adr+1,count);
  1169.                           //
  1170.                           if ((!strchr(tempo,' ')) || inscript) {   // espace dedans: mΘfiance! (sauf dans code javascript)
  1171.                             int invalid_url=0;
  1172.                             
  1173.                             // escape                              
  1174.                             unescape_amp(tempo);
  1175.                             
  1176.                             // Couper au # ou ? Θventuel
  1177.                             {
  1178.                               char* a=strchr(tempo,'#');
  1179.                               if (a)
  1180.                                 *a='\0';
  1181.                               a=strchr(tempo,'?');
  1182.                               if (a)
  1183.                                 *a='\0';
  1184.                             }
  1185.                             
  1186.                             // vΘrifier qu'il n'y a pas de caractΦres spΘciaux
  1187.                             if (!strnotempty(tempo))
  1188.                               invalid_url=1;
  1189.                             else if (strchr(tempo,'*')
  1190.                               || strchr(tempo,'<')
  1191.                               || strchr(tempo,'>')
  1192.                               || strchr(tempo,',')    /* list of files ? */
  1193.                               || strchr(tempo,'\"')    /* potential parsing bug */
  1194.                               || strchr(tempo,'\'')    /* potential parsing bug */
  1195.                               )
  1196.                               invalid_url=1;
  1197.                             else if (tempo[0] == '.')   // ".gif"
  1198.                               invalid_url=1;
  1199.                             
  1200.                             /* non invalide? */
  1201.                             if (!invalid_url) {
  1202.                               // Un plus α la fin? Alors ne pas prendre sauf si extension ("/toto.html#"+tag)
  1203.                               if (c!='+') {    // PAS de plus α la fin
  1204.                                 char* a;
  1205.                                 // "Comparisons of scheme names MUST be case-insensitive" (RFC2616)                                  
  1206.                                 //if ((strncmp(tempo,"http://",7)==0) || (strncmp(tempo,"ftp://",6)==0))  // ok pas de problΦme
  1207.                                 if (
  1208.                                   (strfield(tempo,"http:")) 
  1209.                                   || (strfield(tempo,"ftp:"))
  1210. #if HTS_USEOPENSSL
  1211.                                   || (
  1212.                                   SSL_is_available &&
  1213.                                   (strfield(tempo,"https:"))
  1214.                                   )
  1215. #endif
  1216.                                   )  // ok pas de problΦme
  1217.                                   url_ok=1;
  1218.                                 else if (tempo[strlen(tempo)-1]=='/') {        // un slash: ok..
  1219.                                   if (inscript)   // sinon si pas javascript, mΘfiance (rΘpertoire style base?)
  1220.                                     url_ok=1;
  1221.                                 } else if ((a=strchr(tempo,'/'))) {        // un slash: ok..
  1222.                                   if (inscript) {    // sinon si pas javascript, mΘfiance (style "text/css")
  1223.                                     if (strchr(a+1,'/'))  // un seul / : abandon (STYLE type='text/css')
  1224.                                       url_ok=1;
  1225.                                   }
  1226.                                 }
  1227.                               }
  1228.                               // Prendre si extension reconnue
  1229.                               if (!url_ok) {
  1230.                                 get_httptype(type,tempo,0);
  1231.                                 if (strnotempty(type))     // type reconnu!
  1232.                                   url_ok=1;
  1233.                                 else if (is_dyntype(get_ext(tempo)))  // reconnu php,cgi,asp..
  1234.                                   url_ok=1;
  1235.                                 // MAIS pas les foobar@aol.com !!
  1236.                                 if (strchr(tempo,'@'))
  1237.                                   url_ok=0;
  1238.                               }
  1239.                               //
  1240.                               // Ok, cela pourrait Ωtre une URL
  1241.                               if (url_ok) {
  1242.                                 
  1243.                                 // Check if not fodbidden tag (id,name..)
  1244.                                 if (intag_start_valid) {
  1245.                                   if (intag_start)
  1246.                                     if (intag_startattr)
  1247.                                       if (intag)
  1248.                                         if (!inscript)
  1249.                                           if (!incomment) {
  1250.                                             int i=0,nop=0;
  1251.                                             while( (nop==0) && (strnotempty(hts_nodetect[i])) ) {
  1252.                                               nop=rech_tageq(intag_startattr,hts_nodetect[i]);
  1253.                                               i++;
  1254.                                             }
  1255.                                             // Forbidden tag
  1256.                                             if (nop) {
  1257.                                               url_ok=0;
  1258.                                               if ((opt->debug>1) && (opt->log!=NULL)) {
  1259.                                                 fspc(opt->log,"debug"); fprintf(opt->log,"dirty parsing: bad tag avoided: %s"LF,hts_nodetect[i-1]); test_flush;
  1260.                                               }
  1261.                                             }
  1262.                                           }
  1263.                                 }
  1264.                                 
  1265.                                 
  1266.                                 // Accepter URL, on la traitera comme une URL normale!!
  1267.                                 if (url_ok)
  1268.                                   p=1;
  1269.                                 
  1270.                               }
  1271.                             }
  1272.                           }
  1273.                           }
  1274.                         }
  1275.                       }
  1276.                     }
  1277.                   }
  1278.                 }  // p == 0
  1279.                 
  1280.                 // plus dans un commentaire
  1281.                 if (!incomment_justquit)
  1282.                   parseall_lastc=*adr;             // caractΦre avant le prochain
  1283.                 
  1284.               } // not in comment
  1285.               
  1286.             }  // if realspace
  1287.           }  // if parseall
  1288.           
  1289.           
  1290.           // ------------------------------------------------------------
  1291.           // p!=0 : on a repΘrΘ un Θventuel lien
  1292.           // ------------------------------------------------------------
  1293.           //
  1294.           if ((p>0) || (valid_p)) {    // on a repΘrΘ un lien
  1295.             //int lien_valide=0;
  1296.             char* eadr=NULL;          /* fin de l'URL */
  1297.             char* quote_adr=NULL;     /* adresse du ? dans l'adresse */
  1298.             int ok=1;
  1299.             char quote='\0';
  1300.             
  1301.             // si nofollow ou un stop a ΘtΘ dΘclenchΘ, rΘΘcrire tous les liens en externe
  1302.             if ((nofollow) || (opt->state.stop))
  1303.               p_nocatch=1;
  1304.             
  1305.             // Θcrire codebase avant, flusher avant code
  1306.             if ((p_type==-1) || (p_type==-2)) {
  1307.               if ((opt->getmode & 1) && (ptr>0)) {
  1308.                 HT_ADD_ADR;    // refresh
  1309.               }
  1310.               lastsaved=adr;    // dernier Θcrit+1
  1311.             }
  1312.             
  1313.             // sauter espaces
  1314.             adr+=p;
  1315.             while((is_space(*adr)) && (quote=='\0')) {
  1316.               if (!quote)
  1317.                 if ((*adr=='\"') || (*adr=='\''))
  1318.                   quote=*adr;                     // on doit attendre cela α la fin
  1319.                 // puis quitter
  1320.                 adr++;    // sauter les espaces, "" et cie
  1321.             }
  1322.             
  1323.             /* Stop at \n (LF) if primary links*/
  1324.             if (ptr == 0)
  1325.               quote='\n';
  1326.             /* s'arrΩter que ce soit un ' ou un " : pour document.write('<img src="foo'+a); par exemple! */
  1327.             else if (inscript)
  1328.               quote='\0';
  1329.             
  1330.             // sauter Θventuel \" ou \' javascript
  1331.             if (inscript) {    // on est dans un obj.write("..
  1332.               if (*adr=='\\') {
  1333.                 if ((*(adr+1)=='\'') || (*(adr+1)=='"')) {  // \" ou \'
  1334.                   adr+=2;    // sauter
  1335.                 }
  1336.               }
  1337.             }
  1338.             
  1339.             // sauter content="1;URL=http://..
  1340.             if (p_searchMETAURL) {
  1341.               int l=0;
  1342.               while(
  1343.                 (adr + l + 4 < r->adr + r->size)
  1344.                 && (!strfield(adr+l,"URL=")) 
  1345.                 && (l<128) ) l++;
  1346.               if (!strfield(adr+l,"URL="))
  1347.                 ok=-1;
  1348.               else
  1349.                 adr+=(l+4);
  1350.             }
  1351.             
  1352.             /* Θviter les javascript:document.location=.. : les parser, plut⌠t */
  1353.             if (ok!=-1) {
  1354.               if (strfield(adr,"javascript:")) {
  1355.                 ok=-1;
  1356.                 /*
  1357.                 On est dΘsormais dans du code javascript
  1358.                 */
  1359.                 inscript_tag=inscript=1;
  1360.                 inscript_tag_lastc=quote;     /* α attendre α la fin */
  1361.               }
  1362.             }
  1363.             
  1364.             if (p_type==1) {
  1365.               if (*adr=='#') {
  1366.                 adr++;           // sauter # pour usemap etc
  1367.               }
  1368.             }
  1369.             eadr=adr;
  1370.             
  1371.             // ne pas flusher aprΦs code si on doit Θcrire le codebase avant!
  1372.             if ((p_type!=-1) && (p_type!=2) && (p_type!=-2)) {
  1373.               if ((opt->getmode & 1) && (ptr>0)) {
  1374.                 HT_ADD_ADR;    // refresh
  1375.               }
  1376.               lastsaved=adr;    // dernier Θcrit+1
  1377.               // aprΦs on Θcrira soit les donnΘes initiales,
  1378.               // soir une URL/lien modifiΘ!
  1379.             } else if (p_type==-1) p_flush=adr;    // flusher jusqu'α adr ensuite
  1380.             
  1381.             if (ok!=-1) {    // continuer
  1382.               // dΘcouper le lien
  1383.               do {
  1384.                 if ((* (unsigned char*) eadr)<32) {   // caractΦre de contr⌠le (ou \0)
  1385.                   if (!is_space(*eadr))
  1386.                     ok=0; 
  1387.                 }
  1388.                 if ( ( ((int) (eadr - adr)) ) > HTS_URLMAXSIZE)  // ** trop long, >HTS_URLMAXSIZE caractΦres (on prΘvoit HTS_URLMAXSIZE autres pour path)
  1389.                   ok=-1;    // ne pas traiter ce lien
  1390.                 
  1391.                 if (ok > 0) {
  1392.                   //if (*eadr!=' ') {  
  1393.                   if (is_space(*eadr)) {   // guillemets,CR, etc
  1394.                     if ((!quote) || (*eadr==quote))     // si pas d'attente de quote spΘciale ou si quote atteinte
  1395.                       ok=0; 
  1396.                   } else if (ending_p && (*eadr==ending_p))
  1397.                     ok=0;
  1398.                   else {
  1399.                     switch(*eadr) {
  1400.                     case '>': 
  1401.                       if (!quote) {
  1402.                         if (!inscript) {
  1403.                           intag=0;    // PLUS dans un tag!
  1404.                           intag_start_valid=0;
  1405.                         }
  1406.                         ok=0;
  1407.                       }
  1408.                       break;
  1409.                       /*case '<':*/ 
  1410.                     case '#': 
  1411.                       if (*(eadr-1) != '&')       // (
  1412.                         ok=0; 
  1413.                       break;
  1414.                       // case '?': non!
  1415.                     case '\\': if (inscript) ok=0; break;     // \" ou \' point d'arrΩt
  1416.                     case '?': quote_adr=adr; break;           // noter position query
  1417.                     }
  1418.                   }
  1419.                   //}
  1420.                 } 
  1421.                 eadr++;
  1422.               } while(ok==1);     
  1423.               
  1424.               // Empty link detected
  1425.               if ( (((int) (eadr - adr))) <= 1) {       // link empty
  1426.                 ok=-1;        // No
  1427.                 if (*adr != '#') {        // Not empty+unique #
  1428.                   if ( (((int) (eadr - adr)) == 1)) {       // 1=link empty with delim (end_adr-start_adr)
  1429.                     if (quote) {
  1430.                       if ((opt->getmode & 1) && (ptr>0)) { 
  1431.                         HT_ADD("#");        // We add this for a <href="">
  1432.                       }
  1433.                     }
  1434.                   }
  1435.                 }
  1436.               }
  1437.               
  1438.             }
  1439.             
  1440.             if (ok==0) {    // tester un lien
  1441.               char lien[HTS_URLMAXSIZE*2];
  1442.               int meme_adresse=0;      // 0 par dΘfaut pour primary
  1443.               //char *copie_de_adr=adr;
  1444.               //char* p;
  1445.               
  1446.               // construire lien (dΘcoupage)
  1447.               if ( (((int) (eadr -  adr))-1) < HTS_URLMAXSIZE  ) {    // pas trop long?
  1448.                 strncpy(lien,adr,((int) (eadr - adr))-1);
  1449.                 *(lien+  (((int) (eadr -  adr)))-1  )='\0';
  1450.                 //printf("link: %s\n",lien);          
  1451.                 // supprimer les espaces
  1452.                 while((lien[strlen(lien)-1]==' ') && (strnotempty(lien))) lien[strlen(lien)-1]='\0';
  1453.                 
  1454.                 
  1455. #if HTS_STRIP_DOUBLE_SLASH
  1456.                 // supprimer les // en / (sauf pour http://)
  1457.                 {
  1458.                   char *a,*p,*q;
  1459.                   int done=0;
  1460.                   a=strchr(lien,':');    // http://
  1461.                   if (a) {
  1462.                     a++;
  1463.                     while(*a=='/') a++;    // position aprΦs http://
  1464.                   } else {
  1465.                     a=lien;                // dΘbut
  1466.                     while(*a=='/') a++;    // position aprΦs http://
  1467.                   }
  1468.                   q=strchr(a,'?');     // ne pas traiter aprΦs '?'
  1469.                   if (!q)
  1470.                     q=a+strlen(a)-1;
  1471.                   while(( p=strstr(a,"//")) && (!done) ) {    // remplacer // par /
  1472.                     if ((int) p>(int) q) {   // aprΦs le ? (toto.cgi?param=1//2.3)
  1473.                       done=1;    // stopper
  1474.                     } else {
  1475.                       char tempo[HTS_URLMAXSIZE*2];
  1476.                       tempo[0]='\0';
  1477.                       strncatbuff(tempo,a,(int) p - (int) a);
  1478.                       strcatbuff (tempo,p+1);
  1479.                       strcpybuff(a,tempo);    // recopier
  1480.                     }
  1481.                   }
  1482.                 }
  1483. #endif
  1484.                 
  1485.               } else
  1486.                 lien[0]='\0';    // erreur
  1487.               
  1488.               // ------------------------------------------------------
  1489.               // Lien repΘrΘ et extrait
  1490.               if (strnotempty(lien)>0) {           // construction du lien
  1491.                 char adr[HTS_URLMAXSIZE*2],fil[HTS_URLMAXSIZE*2];          // ATTENTION adr cache le "vrai" adr
  1492.                 int forbidden_url=-1;              // lien non interdit (mais non autorisΘ..)
  1493.                 int just_test_it=0;                // mode de test des liens
  1494.                 int set_prio_to=0;                 // pour capture de page isolΘe
  1495.                 int import_done=0;                 // lien importΘ (ne pas scanner ensuite *α priori*)
  1496.                 //
  1497.                 adr[0]='\0'; fil[0]='\0';
  1498.                 //
  1499.                 // 0: autorisΘ
  1500.                 // 1: interdit (patcher tout de mΩme adresse)
  1501.                 
  1502.                 if ((opt->debug>1) && (opt->log!=NULL)) {
  1503.                   fspc(opt->log,"debug"); fprintf(opt->log,"link detected in html: %s"LF,lien); test_flush;
  1504.                 }
  1505.                 
  1506.                 // external check
  1507. #if HTS_ANALYSTE
  1508.                 if (!hts_htmlcheck_linkdetected(lien)) {
  1509.                   error=1;    // erreur
  1510.                   if (opt->errlog) {
  1511.                     fspc(opt->errlog,"error"); fprintf(opt->errlog,"Link %s refused by external wrapper"LF,lien);
  1512.                     test_flush;
  1513.                   }
  1514.                 }
  1515. #endif
  1516.                 
  1517.                 // purger espaces de dΘbut et fin, CR,LF rΘsiduels
  1518.                 // (IMG SRC="foo.<\n><\t>gif<\t>")
  1519.                 {
  1520.                   char* a = lien;
  1521.                   int llen;
  1522.  
  1523.                   // strip ending spaces
  1524.                   llen = ( *a != '\0' ) ? strlen(a) : 0;
  1525.                   while(llen > 0 && is_realspace(lien[llen - 1]) ) {
  1526.                     a[--llen]='\0';
  1527.                   } 
  1528.                   //  skip leading ones
  1529.                   while(is_realspace(*a)) a++;
  1530.                   // strip cr, lf, tab inside URL
  1531.                   llen = 0;
  1532.                   while(*a) {
  1533.                     if (*a != '\n' && *a != '\r' && *a != '\t') {
  1534.                       lien[llen++] = *a;
  1535.                     }
  1536.                     a++;
  1537.                   }
  1538.                   lien[llen] = '\0';
  1539.                 }
  1540.                 
  1541.                 /* Unescape/escape %20 and other   */
  1542.                 {
  1543.                   char query[HTS_URLMAXSIZE*2];
  1544.                   char* a=strchr(lien,'?');
  1545.                   if (a) {
  1546.                     strcpybuff(query,a);
  1547.                     *a='\0';
  1548.                   } else
  1549.                     query[0]='\0';
  1550.                   // conversion & -> & et autres joyeusetΘs
  1551.                   unescape_amp(lien);
  1552.                   unescape_amp(query);
  1553.                   // dΘcoder l'inutile (%2E par exemple) et coder espaces
  1554.                   // XXXXXXXXXXXXXXXXX strcpybuff(lien,unescape_http(lien));
  1555.                   strcpybuff(lien,unescape_http_unharm(lien, (no_esc_utf)?0:1));
  1556.                   escape_remove_control(lien);
  1557.                   escape_spc_url(lien);
  1558.                   strcatbuff(lien,query);     /* restore */
  1559.                 }
  1560.                 
  1561.                 // convertir les Θventuels \ en des / pour Θviter des problΦmes de reconnaissance!
  1562.                 {
  1563.                   char* a=jump_identification(lien);
  1564.                   while( (a=strchr(a,'\\')) ) *a='/';
  1565.                 }
  1566.                 
  1567.                 // supprimer le(s) ./
  1568.                 while ((lien[0]=='.') && (lien[1]=='/')) {
  1569.                   char tempo[HTS_URLMAXSIZE*2];
  1570.                   strcpybuff(tempo,lien+2);
  1571.                   strcpybuff(lien,tempo);
  1572.                 }
  1573.                 if (strnotempty(lien)==0)  // sauf si plus de nom de fichier
  1574.                   strcpybuff(lien,"./");
  1575.                 
  1576.                 // vΘrifie les /~machin -> /~machin/
  1577.                 // supposition dangereuse?
  1578.                 // OUI!!
  1579. #if HTS_TILDE_SLASH
  1580.                 if (lien[strlen(lien)-1]!='/') {
  1581.                   char *a=lien+strlen(lien)-1;
  1582.                   // Θviter aussi index~1.html
  1583.                   while (((int) a>(int) lien) && (*a!='~') && (*a!='/') && (*a!='.')) a--;
  1584.                   if (*a=='~') {
  1585.                     strcatbuff(lien,"/");    // ajouter slash
  1586.                   }
  1587.                 }
  1588. #endif
  1589.                 
  1590.                 // APPLET CODE="mixer.MixerApplet.class" --> APPLET CODE="mixer/MixerApplet.class"
  1591.                 // yes, this is dirty
  1592.                 // but I'm so lazzy..
  1593.                 // and besides the java "code" convention is really a pain in html code
  1594.                 if (p_type==-1) {
  1595.                   char* a=strrchr(lien,'.');
  1596.                   add_class_dots_to_patch=0;
  1597.                   if (a) {
  1598.                     char* b;
  1599.                     do {
  1600.                       b=strchr(lien,'.');
  1601.                       if ((b != a) && (b)) {
  1602.                         add_class_dots_to_patch++;
  1603.                         *b='/';
  1604.                       }
  1605.                     } while((b != a) && (b));
  1606.                   }
  1607.                 }
  1608.                 
  1609.                 // Θliminer les Θventuels :80 (port par dΘfaut!)
  1610.                 if (link_has_authority(lien)) {
  1611.                   char * a;
  1612.                   a=strstr(lien,"//");    // "//" authority
  1613.                   if (a)
  1614.                     a+=2;
  1615.                   else
  1616.                     a=lien;
  1617.                   // while((*a) && (*a!='/') && (*a!=':')) a++;
  1618.                   a=jump_toport(a);
  1619.                   if (a) {  // port
  1620.                     int port=0;
  1621.                     int defport=80;
  1622.                     char* b=a+1;
  1623. #if HTS_USEOPENSSL
  1624.                     // FIXME
  1625.                     //if (strfield(adr, "https:")) {
  1626.                     //}
  1627. #endif
  1628.                     while(isdigit((unsigned char)*b)) { port*=10; port+=(int) (*b-'0'); b++; }
  1629.                     if (port==defport) {  // port 80, default - c'est dΘbile
  1630.                       char tempo[HTS_URLMAXSIZE*2];
  1631.                       tempo[0]='\0';
  1632.                       strncatbuff(tempo,lien,(int) (a - lien));
  1633.                       strcatbuff(tempo,a+3);  // sauter :80
  1634.                       strcpybuff(lien,tempo);
  1635.                     }
  1636.                   }
  1637.                 }
  1638.                 
  1639.                 // filtrer les parazites (mailto & cie)
  1640.                 /*
  1641.                 if (strfield(lien,"mailto:")) {  // ne pas traiter
  1642.                 error=1;
  1643.                 } else if (strfield(lien,"news:")) {  // ne pas traiter
  1644.                 error=1;
  1645.                 }
  1646.                 */
  1647.                 
  1648.                 // vΘrifier que l'on ne doit pas ajouter de .class
  1649.                 if (!error) {
  1650.                   if (add_class) {
  1651.                     char *a = lien+strlen(lien)-1;
  1652.                     while(( a > lien) && (*a!='/') && (*a!='.')) a--;
  1653.                     if (*a != '.')
  1654.                       strcatbuff(lien,".class");    // ajouter .class
  1655.                     else if (!strfield2(a,".class"))
  1656.                       strcatbuff(lien,".class");    // idem
  1657.                   }
  1658.                 }
  1659.                 
  1660.                 // si c'est un chemin, alors vΘrifier (toto/toto.html -> http://www/toto/)
  1661.                 if (!error) {
  1662.                   if ((opt->debug>1) && (opt->log!=NULL)) {
  1663.                     fspc(opt->log,"debug"); fprintf(opt->log,"position link check %s"LF,lien); test_flush;
  1664.                   }
  1665.                   
  1666.                   if ((p_type==2) || (p_type==-2)) {   // code ou codebase                        
  1667.                     // VΘrifier les codebase=applet (au lieu de applet/)
  1668.                     if (p_type==-2) {    // codebase
  1669.                       if (strnotempty(lien)) {
  1670.                         if (fil[strlen(lien)-1]!='/') {  // pas rΘpertoire
  1671.                           strcatbuff(lien,"/");
  1672.                         }
  1673.                       }
  1674.                     }
  1675.                     /* only one ending / (bug on some pages) */
  1676.                     if ((int)strlen(lien)>2) {
  1677.                       while( (lien[strlen(lien)-2]=='/') && ((int)strlen(lien)>2) )    /* double // (bug) */
  1678.                         lien[strlen(lien)-1]='\0';
  1679.                     }
  1680.                     // copier nom host si besoin est
  1681.                     if (!link_has_authority(lien)) {  // pas de http://
  1682.                       char adr2[HTS_URLMAXSIZE*2],fil2[HTS_URLMAXSIZE*2];  // ** euh ident_url_relatif??
  1683.                       if (ident_url_relatif(lien,urladr,urlfil,adr2,fil2)<0) {                        
  1684.                         error=1;
  1685.                       } else {
  1686.                         strcpybuff(lien,"http://");
  1687.                         strcatbuff(lien,adr2);
  1688.                         if (*fil2!='/')
  1689.                           strcatbuff(lien,"/");
  1690.                         strcatbuff(lien,fil2);
  1691.                         {
  1692.                           char* a;
  1693.                           a=lien+strlen(lien)-1;
  1694.                           while((*a) && (*a!='/') && ( a> lien)) a--;
  1695.                           if (*a=='/') {
  1696.                             *(a+1)='\0';
  1697.                           }
  1698.                         }
  1699.                         //char tempo[HTS_URLMAXSIZE*2];
  1700.                         //strcpybuff(tempo,"http://");
  1701.                         //strcatbuff(tempo,urladr);    // host
  1702.                         //if (*lien!='/')
  1703.                         //  strcatbuff(tempo,"/");
  1704.                         //strcatbuff(tempo,lien);
  1705.                         //strcpybuff(lien,tempo);
  1706.                       }
  1707.                     }
  1708.                     
  1709.                     if (!error) {  // pas d'erreur?
  1710.                       if (p_type==2) {   // code ET PAS codebase      
  1711.                         char* a=lien+strlen(lien)-1;
  1712.                         while( (a > lien) && (*a) && (*a!='/')) a--;
  1713.                         if (*a=='/')     // ok on a repΘrΘ le dernier /
  1714.                           *(a+1)='\0';   // couper
  1715.                         else {
  1716.                           *lien='\0';    // Θliminer
  1717.                           error=1;   // erreur, ne pas poursuivre
  1718.                         }      
  1719.                       }
  1720.                       
  1721.                       // stocker base ou codebase?
  1722.                       switch(p_type) {
  1723.                       case 2: { 
  1724.                         //if (*lien!='/') strcatbuff(base,"/");
  1725.                         strcpybuff(base,lien);
  1726.                               }
  1727.                         break;      // base
  1728.                       case -2: {
  1729.                         //if (*lien!='/') strcatbuff(codebase,"/");
  1730.                         strcpybuff(codebase,lien); 
  1731.                                }
  1732.                         break;  // base
  1733.                       }
  1734.                       
  1735.                       if ((opt->debug>1) && (opt->log!=NULL)) {
  1736.                         fspc(opt->log,"debug"); fprintf(opt->log,"code/codebase link %s base %s"LF,lien,base); test_flush;
  1737.                       }
  1738.                       //printf("base code: %s - %s\n",lien,base);
  1739.                     }
  1740.                     
  1741.                   } else {
  1742.                     char* _base;
  1743.                     if (p_type==-1)   // code (applet)
  1744.                       _base=codebase;
  1745.                     else
  1746.                       _base=base;
  1747.                     
  1748.                     
  1749.                     // ajouter chemin de base href..
  1750.                     if (strnotempty(_base)) {       // considΘrer base
  1751.                       if (!link_has_authority(lien)) {    // non absolue
  1752.                         if (*lien!='/') {           // non absolu sur le site (/)
  1753.                           if ( ((int) strlen(_base)+(int) strlen(lien))<HTS_URLMAXSIZE) {
  1754.                             // mailto: and co: do NOT add base
  1755.                             if (ident_url_relatif(lien,urladr,urlfil,adr,fil)>=0) {
  1756.                               char tempo[HTS_URLMAXSIZE*2];
  1757.                               // base est absolue
  1758.                               strcpybuff(tempo,_base);
  1759.                               strcatbuff(tempo,lien + ((*lien=='/')?1:0) );
  1760.                               strcpybuff(lien,tempo);        // patcher en considΘrant base
  1761.                               // ** vΘrifier que ../ fonctionne (ne doit pas arriver mais bon..)
  1762.                               
  1763.                               if ((opt->debug>1) && (opt->log!=NULL)) {
  1764.                                 fspc(opt->log,"debug"); fprintf(opt->log,"link modified with code/codebase %s"LF,lien); test_flush;
  1765.                               }
  1766.                             }
  1767.                           } else {
  1768.                             error=1;    // erreur
  1769.                             if (opt->errlog) {
  1770.                               fspc(opt->errlog,"error"); fprintf(opt->errlog,"Link %s too long with base href"LF,lien);
  1771.                               test_flush;
  1772.                             }
  1773.                           }
  1774.                         } else {
  1775.                           char badr[HTS_URLMAXSIZE*2], bfil[HTS_URLMAXSIZE*2];
  1776.                           if (ident_url_absolute(_base, badr, bfil) >=0 ) {
  1777.                             if ( ((int) strlen(badr)+(int) strlen(lien)) < HTS_URLMAXSIZE) {
  1778.                               char tempo[HTS_URLMAXSIZE*2];
  1779.                               // base est absolue
  1780.                               tempo[0] = '\0';
  1781.                               if (!link_has_authority(badr)) {
  1782.                                 strcatbuff(tempo, "http://");
  1783.                               }
  1784.                               strcatbuff(tempo,badr);
  1785.                               strcatbuff(tempo,lien);
  1786.                               strcpybuff(lien,tempo);        // patcher en considΘrant base
  1787.                               
  1788.                               if ((opt->debug>1) && (opt->log!=NULL)) {
  1789.                                 fspc(opt->log,"debug"); fprintf(opt->log,"link modified with code/codebase %s"LF,lien); test_flush;
  1790.                               }
  1791.                             } else {
  1792.                               error=1;    // erreur
  1793.                               if (opt->errlog) {
  1794.                                 fspc(opt->errlog,"error"); fprintf(opt->errlog,"Link %s too long with base href"LF,lien);
  1795.                                 test_flush;
  1796.                               }
  1797.                             }
  1798.                           }
  1799.                         }
  1800.                       }
  1801.                     }
  1802.                     
  1803.                     
  1804.                   }
  1805.                 }
  1806.                 
  1807.                 
  1808.                 // transformer lien quelconque (http, relatif, etc) en une adresse
  1809.                 // et un chemin+fichier (adr,fil)
  1810.                 if (!error) {
  1811.                   int reponse;
  1812.                   if ((opt->debug>1) && (opt->log!=NULL)) {
  1813.                     fspc(opt->log,"debug"); fprintf(opt->log,"build relative link %s with %s%s"LF,lien,urladr,urlfil); test_flush;
  1814.                   }
  1815.                   if ((reponse=ident_url_relatif(lien,urladr,urlfil,adr,fil))<0) {                        
  1816.                     adr[0]='\0';    // erreur
  1817.                     if (reponse==-2) {
  1818.                       if (opt->errlog) {
  1819.                         fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link %s not caught (unknown ftp:// protocol)"LF,lien);
  1820.                         test_flush;
  1821.                       }
  1822.                     } else {
  1823.                       if ((opt->debug>1) && (opt->errlog!=NULL)) {
  1824.                         fspc(opt->errlog,"debug"); fprintf(opt->errlog,"ident_url_relatif failed for %s with %s%s"LF,lien,urladr,urlfil); test_flush;
  1825.                       }
  1826.                     }
  1827.                   }
  1828.                 } else {
  1829.                   if ((opt->debug>1) && (opt->log!=NULL)) {
  1830.                     fspc(opt->log,"debug"); fprintf(opt->log,"link %s not build, error detected before"LF,lien); test_flush;
  1831.                   }
  1832.                   adr[0]='\0';
  1833.                 }
  1834.                 
  1835. #if HTS_CHECK_STRANGEDIR
  1836.                 // !ATTENTION!
  1837.                 // Ici on teste les exotiques du genre www.truc.fr/machin (sans slash α la fin)
  1838.                 // je n'ai pas encore trouvΘ le moyen de faire la diffΘrence entre un rΘpertoire
  1839.                 // et un fichier en http A PRIORI : je fais donc un test
  1840.                 // En cas de moved xxx, on recalcule adr et fil, tout simplement
  1841.                 // DEFAUT: test effectuΘ plusieurs fois! α revoir!!!
  1842.                 if ((adr[0]!='\0') && (strcmp(adr,"file://") && (p_type!=2) && (p_type!=-2)) {
  1843.                   //## if ((adr[0]!='\0') && (adr[0]!=lOCAL_CHAR) && (p_type!=2) && (p_type!=-2)) {
  1844.                   if (fil[strlen(fil)-1]!='/') {  // pas rΘpertoire
  1845.                     if (ishtml(fil)==-2) {    // pas d'extension
  1846.                       char loc[HTS_URLMAXSIZE*2];  // Θventuelle nouvelle position
  1847.                       loc[0]='\0';
  1848.                       if ((opt->debug>1) && (opt->log!=NULL)) {
  1849.                         fspc(opt->log,"debug"); fprintf(opt->log,"link-check-directory: %s%s"LF,adr,fil);
  1850.                         test_flush;
  1851.                       }
  1852.                       
  1853.                       // tester Θventuelle nouvelle position
  1854.                       switch (http_location(adr,fil,loc).statuscode) {
  1855.                       case 200: // ok au final
  1856.                         if (strnotempty(loc)) {  // a changΘ d'adresse
  1857.                           if (opt->errlog) {
  1858.                             fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link %s%s has moved to %s for %s%s"LF,adr,fil,loc,urladr,urlfil);
  1859.                             test_flush;
  1860.                           }
  1861.                           
  1862.                           // recalculer adr et fil!
  1863.                           if (ident_url_absolute(loc,adr,fil)==-1) {
  1864.                             adr[0]='\0';  // cancel
  1865.                             if ((opt->debug>1) && (opt->log!=NULL)) {
  1866.                               fspc(opt->log,"debug"); fprintf(opt->log,"link-check-dir: %s%s"LF,adr,fil);
  1867.                               test_flush;
  1868.                             }
  1869.                           }
  1870.                           
  1871.                         }
  1872.                         break;
  1873.                       case -2: case -3:  // timeout ou erreur grave
  1874.                         if (opt->errlog) {
  1875.                           fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Connection too slow for testing link %s%s (from %s%s)"LF,adr,fil,urladr,urlfil);
  1876.                           test_flush;
  1877.                         }
  1878.                         
  1879.                         break;
  1880.                       }
  1881.                       
  1882.                     }
  1883.                   } 
  1884.                 }
  1885. #endif
  1886.                 
  1887.                 // Le lien doit juste Ωtre rΘΘcrit, mais ne doit pas gΘnΘrer un lien
  1888.                 // exemple: <FORM ACTION="url_cgi">
  1889.                 if (p_nocatch) {
  1890.                   forbidden_url=1;    // interdire rΘcupΘration du lien
  1891.                   if ((opt->debug>1) && (opt->log!=NULL)) {
  1892.                     fspc(opt->log,"debug"); fprintf(opt->log,"link forced external at %s%s"LF,adr,fil);
  1893.                     test_flush;
  1894.                   }
  1895.                 }
  1896.                 
  1897.                 // Tester si un lien doit Ωtre acceptΘ ou refusΘ (wizard)
  1898.                 // forbidden_url=1 : lien refusΘ
  1899.                 // forbidden_url=0 : lien acceptΘ
  1900.                 //if ((ptr>0) && (p_type!=2) && (p_type!=-2)) {    // tester autorisations?
  1901.                 if ((p_type!=2) && (p_type!=-2)) {    // tester autorisations?
  1902.                   if (!p_nocatch) {
  1903.                     if (adr[0]!='\0') {          
  1904.                       if ((opt->debug>1) && (opt->log!=NULL)) {
  1905.                         fspc(opt->log,"debug"); fprintf(opt->log,"wizard link test at %s%s.."LF,adr,fil);
  1906.                         test_flush;
  1907.                       }
  1908.                       forbidden_url=hts_acceptlink(opt,ptr,lien_tot,liens,
  1909.                         adr,fil,
  1910.                         &filters,&filptr,opt->maxfilter,
  1911.                         robots,
  1912.                         &set_prio_to,
  1913.                         &just_test_it);
  1914.                       if ((opt->debug>1) && (opt->log!=NULL)) {
  1915.                         fspc(opt->log,"debug"); fprintf(opt->log,"result for wizard link test: %d"LF,forbidden_url);
  1916.                         test_flush;
  1917.                       }
  1918.                     }
  1919.                   }
  1920.                 }
  1921.                 
  1922.                 // calculer meme_adresse
  1923.                 meme_adresse=strfield2(jump_identification(adr),jump_identification(urladr));
  1924.                 
  1925.                 
  1926.                 
  1927.                 // DΘbut partie sauvegarde
  1928.                 
  1929.                 // ici on forme le nom du fichier α sauver, et on patche l'URL
  1930.                 if (adr[0]!='\0') {
  1931.                   // savename: simplifier les ../ et autres joyeusetΘs
  1932.                   char save[HTS_URLMAXSIZE*2];
  1933.                   int r_sv=0;
  1934.                   // En cas de moved, adresse premiΦre
  1935.                   char former_adr[HTS_URLMAXSIZE*2];
  1936.                   char former_fil[HTS_URLMAXSIZE*2];
  1937.                   //
  1938.                   save[0]='\0'; former_adr[0]='\0'; former_fil[0]='\0';
  1939.                   //
  1940.                   
  1941.                   // nom du chemin α sauver si on doit le calculer
  1942.                   // note: url_savename peut dΘcider de tester le lien si il le trouve
  1943.                   // suspect, et modifier alors adr et fil
  1944.                   // dans ce cas on aura une rΘfΘrence directe au lieu des traditionnels
  1945.                   // moved en cascade (impossible α reproduire α priori en local, lorsque des fichiers
  1946.                   // gif sont impliquΘs par exemple)
  1947.                   if ((p_type!=2) && (p_type!=-2)) {  // pas base href ou codebase
  1948.                     if (forbidden_url!=1) {
  1949.                       char last_adr[HTS_URLMAXSIZE*2];
  1950.                       last_adr[0]='\0';
  1951.                       //char last_fil[HTS_URLMAXSIZE*2]="";
  1952.                       strcpybuff(last_adr,adr);    // ancienne adresse
  1953.                       //strcpybuff(last_fil,fil);    // ancien chemin
  1954.                       r_sv=url_savename(adr,fil,save,former_adr,former_fil,liens[ptr]->adr,liens[ptr]->fil,opt,liens,lien_tot,back,back_max,cache,hash,ptr,numero_passe);
  1955.                       if (strcmp(jump_identification(last_adr),jump_identification(adr)) != 0) {  // a changΘ
  1956.                         
  1957.                         // 2e test si moved
  1958.                         
  1959.                         // Tester si un lien doit Ωtre acceptΘ ou refusΘ (wizard)
  1960.                         // forbidden_url=1 : lien refusΘ
  1961.                         // forbidden_url=0 : lien acceptΘ
  1962.                         if ((ptr>0) && (p_type!=2) && (p_type!=-2)) {    // tester autorisations?
  1963.                           if (!p_nocatch) {
  1964.                             if (adr[0]!='\0') {          
  1965.                               if ((opt->debug>1) && (opt->log!=NULL)) {
  1966.                                 fspc(opt->log,"debug"); fprintf(opt->log,"wizard moved link retest at %s%s.."LF,adr,fil);
  1967.                                 test_flush;
  1968.                               }
  1969.                               forbidden_url=hts_acceptlink(opt,ptr,lien_tot,liens,
  1970.                                 adr,fil,
  1971.                                 &filters,&filptr,opt->maxfilter,
  1972.                                 robots,
  1973.                                 &set_prio_to,
  1974.                                 &just_test_it);
  1975.                               if ((opt->debug>1) && (opt->log!=NULL)) {
  1976.                                 fspc(opt->log,"debug"); fprintf(opt->log,"result for wizard moved link retest: %d"LF,forbidden_url);
  1977.                                 test_flush;
  1978.                               }
  1979.                             }
  1980.                           }
  1981.                         }
  1982.                         
  1983.                         //import_done=1;    // c'est un import!
  1984.                         meme_adresse=0;   // on a changΘ
  1985.                       }
  1986.                     } else {
  1987.                       strcpybuff(save,"");  // dummy
  1988.                     }
  1989.                   }
  1990.                   if (r_sv!=-1) {  // pas d'erreur, on continue
  1991.                     /* log */
  1992.                     if ((opt->debug>1) && (opt->log!=NULL)) {
  1993.                       fspc(opt->log,"debug");
  1994.                       if (forbidden_url!=1) {    // le lien va Ωtre chargΘ
  1995.                         if ((p_type==2) || (p_type==-2)) {  // base href ou codebase, pas un lien
  1996.                           fprintf(opt->log,"Code/Codebase: %s%s"LF,adr,fil);
  1997.                         } else if ((opt->getmode & 4)==0) {
  1998.                           fprintf(opt->log,"Record: %s%s -> %s"LF,adr,fil,save);
  1999.                         } else {
  2000.                           if (!ishtml(fil))
  2001.                             fprintf(opt->log,"Record after: %s%s -> %s"LF,adr,fil,save);
  2002.                           else
  2003.                             fprintf(opt->log,"Record: %s%s -> %s"LF,adr,fil,save);
  2004.                         } 
  2005.                       } else
  2006.                         fprintf(opt->log,"External: %s%s"LF,adr,fil);
  2007.                       test_flush;
  2008.                     }
  2009.                     /* FIN log */
  2010.                     
  2011.                     // Θcrire lien
  2012.                     if ((p_type==2) || (p_type==-2)) {  // base href ou codebase, sauter
  2013.                       lastsaved=eadr-1+1;  // sauter "
  2014.                     }
  2015.                     /* */
  2016.                     else if (opt->urlmode==0) {    // URL absolue dans tous les cas
  2017.                       if ((opt->getmode & 1) && (ptr>0)) {    // ecrire les html
  2018.                         if (!link_has_authority(adr)) {
  2019.                           HT_ADD("http://");
  2020.                         } else {
  2021.                           char* aut = strstr(adr, "//");
  2022.                           if (aut) {
  2023.                             char tmp[256];
  2024.                             tmp[0]='\0';
  2025.                             strncatbuff(tmp, adr, (int) (aut - adr));   // scheme
  2026.                             HT_ADD(tmp);          // Protocol
  2027.                             HT_ADD("//");
  2028.                           }
  2029.                         }
  2030.                         
  2031.                         if (!opt->passprivacy) {
  2032.                           HT_ADD(jump_protocol(adr));           // Password
  2033.                         } else {
  2034.                           HT_ADD(jump_identification(adr));     // No Password
  2035.                         }
  2036.                         if (*fil!='/')
  2037.                           HT_ADD("/");
  2038.                         HT_ADD(fil);
  2039.                       }
  2040.                       lastsaved=eadr-1;    // dernier Θcrit+1 (enfin euh apres on fait un ++ alors hein)
  2041.                       /* */
  2042.                     } else if (opt->urlmode >= 4) {    // ne rien faire dans tous les cas!
  2043.                       /* */
  2044.                       /* leave the link 'as is' */
  2045.                       /* Sinon, dΘpend de interne/externe */
  2046.                     } else if (forbidden_url==1) {    // le lien ne sera pas chargΘ, rΘfΘrence externe!
  2047.                       if ((opt->getmode & 1) && (ptr>0)) {
  2048.                         if (p_type!=-1) {     // pas que le nom de fichier (pas classe java)
  2049.                           if (!opt->external) {
  2050.                             if (!link_has_authority(adr)) {
  2051.                               HT_ADD("http://");
  2052.                               if (!opt->passprivacy) {
  2053.                                 HT_ADD(adr);     // Password
  2054.                               } else {
  2055.                                 HT_ADD(jump_identification(adr));     // No Password
  2056.                               }
  2057.                               if (*fil!='/')
  2058.                                 HT_ADD("/");
  2059.                               HT_ADD(fil);
  2060.                             } else {
  2061.                               char* aut = strstr(adr, "//");
  2062.                               if (aut) {
  2063.                                 char tmp[256];
  2064.                                 tmp[0]='\0';
  2065.                                 strncatbuff(tmp, adr, (int) (aut - adr));   // scheme
  2066.                                 HT_ADD(tmp);          // Protocol
  2067.                                 HT_ADD("//");
  2068.                                 if (!opt->passprivacy) {
  2069.                                   HT_ADD(jump_protocol(adr));          // Password
  2070.                                 } else {
  2071.                                   HT_ADD(jump_identification(adr));     // No Password
  2072.                                 }
  2073.                                 if (*fil!='/')
  2074.                                   HT_ADD("/");
  2075.                                 HT_ADD(fil);
  2076.                               }
  2077.                             }
  2078.                             //
  2079.                           } else {    // fichier/page externe, mais on veut gΘnΘrer une erreur
  2080.                             //
  2081.                             int patch_it=0;
  2082.                             int add_url=0;
  2083.                             char* cat_name=NULL;
  2084.                             char* cat_data=NULL;
  2085.                             int cat_nb=0;
  2086.                             int cat_data_len=0;
  2087.                             
  2088.                             // ajouter lien external
  2089.                             switch ( (link_has_authority(adr)) ? 1 : ( (fil[strlen(fil)-1]=='/')?1:(ishtml(fil))  ) ) {
  2090.                             case 1: case -2:       // html ou rΘpertoire
  2091.                               if (opt->getmode & 1) {  // sauver html
  2092.                                 patch_it=1;   // redirect
  2093.                                 add_url=1;    // avec link?
  2094.                                 cat_name="external.html";
  2095.                                 cat_nb=0;
  2096.                                 cat_data=HTS_DATA_UNKNOWN_HTML;
  2097.                                 cat_data_len=HTS_DATA_UNKNOWN_HTML_LEN;
  2098.                               }
  2099.                               break;
  2100.                             default:    // inconnu
  2101.                               // asp, cgi..
  2102.                               if (is_dyntype(get_ext(fil))) {
  2103.                                 patch_it=1;   // redirect
  2104.                                 add_url=1;    // avec link?
  2105.                                 cat_name="external.html";
  2106.                                 cat_nb=0;
  2107.                                 cat_data=HTS_DATA_UNKNOWN_HTML;
  2108.                                 cat_data_len=HTS_DATA_UNKNOWN_HTML_LEN;
  2109.                               } else if ( (strfield2(fil+max(0,(int)strlen(fil)-4),".gif")) 
  2110.                                 || (strfield2(fil+max(0,(int)strlen(fil)-4),".jpg")) 
  2111.                                 || (strfield2(fil+max(0,(int)strlen(fil)-4),".xbm")) 
  2112.                                 || (ishtml(fil)!=0) ) {
  2113.                                 patch_it=1;   // redirect
  2114.                                 add_url=1;    // avec link aussi
  2115.                                 cat_name="external.gif";
  2116.                                 cat_nb=1;
  2117.                                 cat_data=HTS_DATA_UNKNOWN_GIF;
  2118.                                 cat_data_len=HTS_DATA_UNKNOWN_GIF_LEN;
  2119.                               }
  2120.                               break;
  2121.                             }// html,gif
  2122.                             
  2123.                             if (patch_it) {
  2124.                               char save[HTS_URLMAXSIZE*2];
  2125.                               char tempo[HTS_URLMAXSIZE*2];
  2126.                               strcpybuff(save,opt->path_html);
  2127.                               strcatbuff(save,cat_name);
  2128.                               if (lienrelatif(tempo,save,savename)==0) {
  2129.                                 if (!no_esc_utf)
  2130.                                   escape_uri(tempo);     // escape with %xx
  2131.                                 else
  2132.                                   escape_uri_utf(tempo);     // escape with %xx
  2133.                                 HT_ADD(tempo);    // page externe
  2134.                                 if (add_url) {
  2135.                                   HT_ADD("?link=");    // page externe
  2136.                                   
  2137.                                   // same as above
  2138.                                   if (!link_has_authority(adr)) {
  2139.                                     HT_ADD("http://");
  2140.                                     if (!opt->passprivacy) {
  2141.                                       HT_ADD(adr);     // Password
  2142.                                     } else {
  2143.                                       HT_ADD(jump_identification(adr));     // No Password
  2144.                                     }
  2145.                                     if (*fil!='/')
  2146.                                       HT_ADD("/");
  2147.                                     HT_ADD(fil);
  2148.                                   } else {
  2149.                                     char* aut = strstr(adr, "//");
  2150.                                     if (aut) {
  2151.                                       char tmp[256];
  2152.                                       tmp[0]='\0';
  2153.                                       strncatbuff(tmp, adr, (int) (aut - adr) + 2);   // scheme
  2154.                                       HT_ADD(tmp);
  2155.                                       if (!opt->passprivacy) {
  2156.                                         HT_ADD(jump_protocol(adr));          // Password
  2157.                                       } else {
  2158.                                         HT_ADD(jump_identification(adr));     // No Password
  2159.                                       }
  2160.                                       if (*fil!='/')
  2161.                                         HT_ADD("/");
  2162.                                       HT_ADD(fil);
  2163.                                     }
  2164.                                   }
  2165.                                   //
  2166.                                   
  2167.                                 }
  2168.                               }
  2169.                               
  2170.                               // Θcrire fichier?
  2171.                               if (verif_external(cat_nb,1)) {
  2172.                                 //if (!fexist(fconcat(opt->path_html,cat_name))) {
  2173.                                 FILE* fp = filecreate(fconcat(opt->path_html,cat_name));
  2174.                                 if (fp) {
  2175.                                   if (cat_data_len==0) {   // texte
  2176.                                     verif_backblue(opt->path_html);
  2177.                                     fprintf(fp,"%s%s","<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"LF,cat_data);
  2178.                                   } else {                    // data
  2179.                                     fwrite(cat_data,cat_data_len,1,fp);
  2180.                                   }
  2181.                                   fclose(fp);
  2182.                                   usercommand(0,NULL,fconcat(opt->path_html,cat_name));
  2183.                                 }
  2184.                               }
  2185.                             }  else {    // Θcrire normalement le nom de fichier
  2186.                               HT_ADD("http://");
  2187.                               if (!opt->passprivacy) {
  2188.                                 HT_ADD(adr);       // Password
  2189.                               } else {
  2190.                                 HT_ADD(jump_identification(adr));       // No Password
  2191.                               }
  2192.                               if (*fil!='/')
  2193.                                 HT_ADD("/");
  2194.                               HT_ADD(fil);
  2195.                             }// patcher?
  2196.                             }  // external
  2197.                           } else {  // que le nom de fichier (classe java)
  2198.                             // en gros recopie de plus bas: copier codebase et base
  2199.                             if (p_flush) {
  2200.                               char tempo[HTS_URLMAXSIZE*2];    // <-- ajoutΘ
  2201.                               char tempo_pat[HTS_URLMAXSIZE*2];
  2202.                               
  2203.                               // Calculer chemin
  2204.                               tempo_pat[0]='\0';
  2205.                               strcpybuff(tempo,fil);  // <-- ajoutΘ
  2206.                               {
  2207.                                 char* a=strrchr(tempo,'/');
  2208.                                 
  2209.                                 // Example: we converted code="x.y.z.foo.class" into "x/y/z/foo.class"
  2210.                                 // we have to do the contrary now
  2211.                                 if (add_class_dots_to_patch>0) {
  2212.                                   while( (add_class_dots_to_patch>0) && (a) ) {
  2213.                                     *a='.';     // convert "false" java / into .
  2214.                                     add_class_dots_to_patch--;
  2215.                                     a=strrchr(tempo,'/');
  2216.                                   }
  2217.                                   // if add_class_dots_to_patch, this is because there is a problem!!
  2218.                                   if (add_class_dots_to_patch) {
  2219.                                     if (opt->errlog) {
  2220.                                       fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Error: can not rewind java path %s, check html code"LF,tempo);
  2221.                                       test_flush;
  2222.                                     }
  2223.                                   }
  2224.                                 }
  2225.                                 
  2226.                                 // Cut path/filename
  2227.                                 if (a) {
  2228.                                   char tempo2[HTS_URLMAXSIZE*2];
  2229.                                   strcpybuff(tempo2,a+1);         // FICHIER
  2230.                                   strncatbuff(tempo_pat,tempo,(int) (a - tempo)+1);  // chemin
  2231.                                   strcpybuff(tempo,tempo2);                     // fichier
  2232.                                 }
  2233.                               }
  2234.                               
  2235.                               // Θrire codebase="chemin"
  2236.                               if ((opt->getmode & 1) && (ptr>0)) {
  2237.                                 char tempo4[HTS_URLMAXSIZE*2];
  2238.                                 tempo4[0]='\0';
  2239.                                 
  2240.                                 if (strnotempty(tempo_pat)) {
  2241.                                   HT_ADD("codebase=\"http://");
  2242.                                   if (!opt->passprivacy) {
  2243.                                     HT_ADD(adr);  // Password
  2244.                                   } else {
  2245.                                     HT_ADD(jump_identification(adr));  // No Password
  2246.                                   }
  2247.                                   if (*tempo_pat!='/') HT_ADD("/");
  2248.                                   HT_ADD(tempo_pat);
  2249.                                   HT_ADD("\" ");
  2250.                                 }
  2251.                                 
  2252.                                 strncatbuff(tempo4,lastsaved,(int) (p_flush - lastsaved));
  2253.                                 HT_ADD(tempo4);    // refresh code="
  2254.                                 HT_ADD(tempo);
  2255.                               }
  2256.                             }
  2257.                           }
  2258.                         }
  2259.                         lastsaved=eadr-1;
  2260.                       }
  2261.                       /*
  2262.                       else if (opt->urlmode==1) {    // ABSOLU, c'est le cas le moins courant
  2263.                       //  NE FONCTIONNE PAS!!  (et est inutile)
  2264.                       if ((opt->getmode & 1) && (ptr>0)) {    // ecrire les html
  2265.                       // Θcrire le lien modifiΘ, absolu
  2266.                       HT_ADD("file:");
  2267.                       if (*save=='/')
  2268.                       HT_ADD(save+1)
  2269.                       else
  2270.                       HT_ADD(save)
  2271.                       }
  2272.                       lastsaved=eadr-1;    // dernier Θcrit+1 (enfin euh apres on fait un ++ alors hein)
  2273.                       }
  2274.                       */
  2275.                       else if (opt->urlmode==3) {    // URI absolue /
  2276.                         if ((opt->getmode & 1) && (ptr>0)) {    // ecrire les html
  2277.                           HT_ADD(fil);
  2278.                         }
  2279.                         lastsaved=eadr-1;    // dernier Θcrit+1 (enfin euh apres on fait un ++ alors hein)
  2280.                       }
  2281.                       else if (opt->urlmode==2) {  // RELATIF
  2282.                         char tempo[HTS_URLMAXSIZE*2];
  2283.                         tempo[0]='\0';
  2284.                         // calculer le lien relatif
  2285.                         
  2286.                         if (lienrelatif(tempo,save,savename)==0) {
  2287.                           if (!no_esc_utf)
  2288.                             escape_uri(tempo);     // escape with %xx
  2289.                           else
  2290.                             escape_uri_utf(tempo);     // escape with %xx
  2291.                           if ((opt->debug>1) && (opt->log!=NULL)) {
  2292.                             fspc(opt->log,"debug"); fprintf(opt->log,"relative link at %s build with %s and %s: %s"LF,adr,save,savename,tempo);
  2293.                             test_flush;
  2294.                           }
  2295.                           
  2296.                           // lien applet (code) - il faut placer un codebase avant
  2297.                           if (p_type==-1) {  // que le nom de fichier
  2298.                             
  2299.                             if (p_flush) {
  2300.                               char tempo_pat[HTS_URLMAXSIZE*2];
  2301.                               tempo_pat[0]='\0';
  2302.                               {
  2303.                                 char* a=strrchr(tempo,'/');
  2304.                                 
  2305.                                 // Example: we converted code="x.y.z.foo.class" into "x/y/z/foo.class"
  2306.                                 // we have to do the contrary now
  2307.                                 if (add_class_dots_to_patch>0) {
  2308.                                   while( (add_class_dots_to_patch>0) && (a) ) {
  2309.                                     *a='.';     // convert "false" java / into .
  2310.                                     add_class_dots_to_patch--;
  2311.                                     a=strrchr(tempo,'/');
  2312.                                   }
  2313.                                   // if add_class_dots_to_patch, this is because there is a problem!!
  2314.                                   if (add_class_dots_to_patch) {
  2315.                                     if (opt->errlog) {
  2316.                                       fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Error: can not rewind java path %s, check html code"LF,tempo);
  2317.                                       test_flush;
  2318.                                     }
  2319.                                   }
  2320.                                 }
  2321.                                 
  2322.                                 if (a) {
  2323.                                   char tempo2[HTS_URLMAXSIZE*2];
  2324.                                   strcpybuff(tempo2,a+1);
  2325.                                   strncatbuff(tempo_pat,tempo,(int) (a - tempo)+1);  // chemin
  2326.                                   strcpybuff(tempo,tempo2);                     // fichier
  2327.                                 }
  2328.                               }
  2329.                               
  2330.                               // Θrire codebase="chemin"
  2331.                               if ((opt->getmode & 1) && (ptr>0)) {
  2332.                                 char tempo4[HTS_URLMAXSIZE*2];
  2333.                                 tempo4[0]='\0';
  2334.                                 
  2335.                                 if (strnotempty(tempo_pat)) {
  2336.                                   HT_ADD("codebase=\"");
  2337.                                   HT_ADD(tempo_pat);
  2338.                                   HT_ADD("\" ");
  2339.                                 }
  2340.                                 
  2341.                                 strncatbuff(tempo4,lastsaved,(int) (p_flush - lastsaved));
  2342.                                 HT_ADD(tempo4);    // refresh code="
  2343.                               }
  2344.                             }
  2345.                             //lastsaved=adr;    // dernier Θcrit+1
  2346.                           }                              
  2347.                           
  2348.                           if ((opt->getmode & 1) && (ptr>0)) {
  2349.                             // Θcrire le lien modifiΘ, relatif
  2350.                             HT_ADD(tempo);
  2351.                             
  2352.                             // Add query-string, for informational purpose only
  2353.                             // Useless, because all parameters-pages are saved into different targets
  2354.                             if (opt->includequery) {
  2355.                               char* a=strchr(lien,'?');
  2356.                               if (a) {
  2357.                                 HT_ADD(a);
  2358.                               }
  2359.                             }
  2360.                           }
  2361.                           lastsaved=eadr-1;    // dernier Θcrit+1 (enfin euh apres on fait un ++ alors hein)
  2362.                         } else {
  2363.                           if (opt->errlog) {
  2364.                             fprintf(opt->errlog,"Error building relative link %s and %s"LF,save,savename);
  2365.                             test_flush;
  2366.                           }
  2367.                         }
  2368.                       }  // sinon le lien sera Θcrit normalement
  2369.                       
  2370.                       
  2371. #if 0
  2372.                       if (fexist(save)) {    // le fichier existe..
  2373.                         adr[0]='\0';
  2374.                         //if ((opt->debug>0) && (opt->log!=NULL)) {
  2375.                         if (opt->errlog) {
  2376.                           fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link has already been written on disk, cancelled: %s"LF,save);
  2377.                           test_flush;
  2378.                         }
  2379.                       }
  2380. #endif                            
  2381.                       
  2382.                       /* Security check */
  2383.                       if (strlen(save) >= HTS_URLMAXSIZE) {
  2384.                         adr[0]='\0';
  2385.                         if (opt->errlog) {
  2386.                           fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link is too long: %s"LF,save);
  2387.                           test_flush;
  2388.                         }
  2389.                       }
  2390.                       
  2391.                       if ((adr[0]!='\0') && (p_type!=2) && (p_type!=-2) && (forbidden_url!=1) ) {  // si le fichier n'existe pas, ajouter α la liste                            
  2392.                         // n'y a-t-il pas trop de liens?
  2393.                         if (lien_tot+1 >= lien_max-4) {    // trop de liens!
  2394.                           printf("PANIC! : Too many URLs : >%d [%d]\n",lien_tot,__LINE__);
  2395.                           if (opt->errlog) {
  2396.                             fprintf(opt->errlog,LF"Too many URLs, giving up..(>%d)"LF,lien_max);
  2397.                             fprintf(opt->errlog,"To avoid that: use #L option for more links (example: -#L1000000)"LF);
  2398.                             test_flush;
  2399.                           }
  2400.                           if ((opt->getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } }
  2401.                           XH_uninit;   // dΘsallocation mΘmoire & buffers
  2402.                           return -1;
  2403.                           
  2404.                         } else {    // noter le lien sur la listes des liens α charger
  2405.                           int pass_fix,dejafait=0;
  2406.                           
  2407.                           // Calculer la prioritΘ de ce lien
  2408.                           if ((opt->getmode & 4)==0) {    // traiter html aprΦs
  2409.                             pass_fix=0;
  2410.                           } else {    // vΘrifier que ce n'est pas un !html
  2411.                             if (!ishtml(fil))
  2412.                               pass_fix=1;        // prioritΘ infΘrieure (traiter aprΦs)
  2413.                             else
  2414.                               pass_fix=max(0,numero_passe);    // prioritΘ normale
  2415.                           }
  2416.                           
  2417.                           /* If the file seems to be an html file, get depth-1 */
  2418.                           /*
  2419.                           if (strnotempty(save)) {
  2420.                           if (ishtml(save) == 1) {
  2421.                           // descore_prio = 2;
  2422.                           } else {
  2423.                           // descore_prio = 1;
  2424.                           }
  2425.                           }
  2426.                           */
  2427.                           
  2428.                           // vΘrifier que le lien n'a pas dΘja ΘtΘ notΘ
  2429.                           // si c'est le cas, alors il faut s'assurer que la prioritΘ associΘe
  2430.                           // au fichier est la plus grande des deux prioritΘs
  2431.                           //
  2432.                           // On part de la fin et on essaye de se presser (Θconomise temps machine)
  2433. #if HTS_HASH
  2434.                           {
  2435.                             int i=hash_read(hash,save,"",0);      // lecture type 0 (sav)
  2436.                             if (i>=0) {
  2437.                               liens[i]->depth=maximum(liens[i]->depth,liens[ptr]->depth - 1);
  2438.                               dejafait=1;
  2439.                             }
  2440.                           }
  2441. #else
  2442.                           {
  2443.                             int l;
  2444.                             int i;
  2445.                             l=strlen(save);  // opti
  2446.                             for(i=lien_tot-1;(i>=0) && (dejafait==0);i--) {
  2447.                               if (liens[i]->sav_len==l) {    // mΩme taille de chaεne
  2448.                                 if (strcmp(liens[i]->sav,save)==0) {    // existe dΘja
  2449.                                   liens[i]->depth=maximum(liens[i]->depth,liens[ptr]->depth - 1);
  2450.                                   dejafait=1;
  2451.                                 }
  2452.                               }
  2453.                             }
  2454.                           }
  2455. #endif
  2456.                           
  2457.                           // le lien n'a jamais ΘtΘ crΘΘ.
  2458.                           // cette fois ci, on le crΘe!
  2459.                           if (!dejafait) {                                
  2460.                             //
  2461.                             // >>>> CREER LE LIEN <<<<
  2462.                             //
  2463.                             // enregistrer lien α charger
  2464.                             //liens[lien_tot]->adr[0]=liens[lien_tot]->fil[0]=liens[lien_tot]->sav[0]='\0';
  2465.                             // mΩme adresse: l'objet pΦre est l'objet pΦre de l'actuel
  2466.                             
  2467.                             // DEBUT ROBOTS.TXT AJOUT
  2468.                             if (!just_test_it) {
  2469.                               if (
  2470.                                 (!strfield(adr,"ftp://"))         // non ftp
  2471.                                 && (!strfield(adr,"file://")) ) {    // non file
  2472.                                 if (opt->robots) {    // rΘcupΘrer robots
  2473.                                   if (ishtml(fil)!=0) {                       // pas la peine pour des fichiers isolΘs
  2474.                                     if (checkrobots(robots,adr,"") != -1) {    // robots.txt ?
  2475.                                       checkrobots_set(robots,adr,"");          // ajouter entrΘe vide
  2476.                                       if (checkrobots(robots,adr,"") == -1) {    // robots.txt ?
  2477.                                         // enregistrer robots.txt (MACRO)
  2478.                                         liens_record(adr,"/robots.txt","","","");
  2479.                                         if (liens[lien_tot]==NULL) {  // erreur, pas de place rΘservΘe
  2480.                                           printf("PANIC! : Not enough memory [%d]\n",__LINE__);
  2481.                                           if (opt->errlog) { 
  2482.                                             fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
  2483.                                             test_flush;
  2484.                                           }
  2485.                                           if ((opt->getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } }
  2486.                                           XH_uninit;    // dΘsallocation mΘmoire & buffers
  2487.                                           return -1;
  2488.                                         }  
  2489.                                         liens[lien_tot]->testmode=0;          // pas mode test
  2490.                                         liens[lien_tot]->link_import=0;       // pas mode import     
  2491.                                         liens[lien_tot]->premier=lien_tot;
  2492.                                         liens[lien_tot]->precedent=ptr;
  2493.                                         liens[lien_tot]->depth=0;
  2494.                                         liens[lien_tot]->pass2=max(0,numero_passe);
  2495.                                         liens[lien_tot]->retry=0;
  2496.                                         lien_tot++;  // UN LIEN DE PLUS
  2497. #if DEBUG_ROBOTS
  2498.                                         printf("robots.txt: added file robots.txt for %s\n",adr);
  2499. #endif
  2500.                                         if ((opt->debug>1) && (opt->log!=NULL)) {
  2501.                                           fspc(opt->log,"debug"); fprintf(opt->log,"robots.txt added at %s"LF,adr);
  2502.                                           test_flush;
  2503.                                         }
  2504.                                       } else {
  2505.                                         if (opt->errlog) {   
  2506.                                           fprintf(opt->errlog,"Unexpected robots.txt error at %d"LF,__LINE__);
  2507.                                           test_flush;
  2508.                                         }
  2509.                                       }
  2510.                                     }
  2511.                                   }
  2512.                                 }
  2513.                               }
  2514.                             }
  2515.                             // FIN ROBOTS.TXT AJOUT
  2516.                             
  2517.                             // enregistrer (MACRO)
  2518.                             liens_record(adr,fil,save,former_adr,former_fil);
  2519.                             if (liens[lien_tot]==NULL) {  // erreur, pas de place rΘservΘe
  2520.                               printf("PANIC! : Not enough memory [%d]\n",__LINE__);
  2521.                               if (opt->errlog) { 
  2522.                                 fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
  2523.                                 test_flush;
  2524.                               }
  2525.                               if ((opt->getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } }
  2526.                               XH_uninit;    // dΘsallocation mΘmoire & buffers
  2527.                               return -1;
  2528.                             }  
  2529.                             
  2530.                             // mode test?
  2531.                             if (!just_test_it)
  2532.                               liens[lien_tot]->testmode=0;          // pas mode test
  2533.                             else
  2534.                               liens[lien_tot]->testmode=1;          // mode test
  2535.                             if (!import_done)
  2536.                               liens[lien_tot]->link_import=0;       // pas mode import
  2537.                             else
  2538.                               liens[lien_tot]->link_import=1;       // mode import
  2539.                             // Θcrire autres paramΦtres de la structure-lien
  2540.                             if ((meme_adresse) && (!import_done) && (liens[ptr]->premier != 0))
  2541.                               liens[lien_tot]->premier=liens[ptr]->premier;
  2542.                             else    // sinon l'objet pΦre est le prΘcΘdent lui mΩme
  2543.                               liens[lien_tot]->premier=lien_tot;
  2544.                             // liens[lien_tot]->premier=ptr;
  2545.                             
  2546.                             liens[lien_tot]->precedent=ptr;
  2547.                             // noter la prioritΘ
  2548.                             if (!set_prio_to)
  2549.                               liens[lien_tot]->depth=liens[ptr]->depth - 1;
  2550.                             else
  2551.                               liens[lien_tot]->depth=max(0,min(liens[ptr]->depth-1,set_prio_to-1));         // PRIORITE NULLE (catch page)
  2552.                             // noter pass
  2553.                             liens[lien_tot]->pass2=pass_fix;
  2554.                             liens[lien_tot]->retry=opt->retry;
  2555.                             
  2556.                             //strcpybuff(liens[lien_tot]->adr,adr);
  2557.                             //strcpybuff(liens[lien_tot]->fil,fil);
  2558.                             //strcpybuff(liens[lien_tot]->sav,save); 
  2559.                             if ((opt->debug>1) && (opt->log!=NULL)) {
  2560.                               if (!just_test_it) {
  2561.                                 fspc(opt->log,"debug"); fprintf(opt->log,"OK, NOTE: %s%s -> %s"LF,liens[lien_tot]->adr,liens[lien_tot]->fil,liens[lien_tot]->sav);
  2562.                               } else {
  2563.                                 fspc(opt->log,"debug"); fprintf(opt->log,"OK, TEST: %s%s"LF,liens[lien_tot]->adr,liens[lien_tot]->fil);
  2564.                               }
  2565.                               test_flush;
  2566.                             }
  2567.                             
  2568.                             lien_tot++;  // UN LIEN DE PLUS
  2569.                           } else { // if !dejafait
  2570.                             if ((opt->debug>1) && (opt->log!=NULL)) {
  2571.                               fspc(opt->log,"debug"); fprintf(opt->log,"link has already been recorded, cancelled: %s"LF,save);
  2572.                               test_flush;
  2573.                             }
  2574.                             
  2575.                           }
  2576.                           
  2577.                           
  2578.                         }   // si pas trop de liens
  2579.                       }   // si adr[0]!='\0'
  2580.                       
  2581.                       
  2582.                     }  // if adr[0]!='\0' 
  2583.                     
  2584.                   }  // if adr[0]!='\0'
  2585.                   
  2586.                 }    // if strlen(lien)>0
  2587.                 
  2588.               }   // if ok==0      
  2589.               
  2590.               adr=eadr-1;  // ** sauter
  2591.               
  2592.             }  // if (p) 
  2593.             
  2594.           }  // si '<' ou '>'
  2595.           
  2596.           // plus loin
  2597.           adr++;
  2598.           
  2599.           
  2600.           /* Otimization: if we are scanning in HTML data (not in tag or script), 
  2601.           then jump to the next starting tag */
  2602.           if (ptr>0) {
  2603.             if ( (!intag)         /* Not in tag */
  2604.               && (!inscript)      /* Not in (java)script */
  2605.               && (!incomment)     /* Not in comment (<!--) */
  2606.               && (!inscript_tag)  /* Not in tag with script inside */
  2607.               ) 
  2608.             {
  2609.               /* Not at the end */
  2610.               if (( ((int) (adr - r->adr)) ) < r->size) {
  2611.                 /* Not on a starting tag yet */
  2612.                 if (*adr != '<') {
  2613.                   char* adr_next = strchr(adr,'<');
  2614.                   /* Jump to near end (index hack) */
  2615.                   if (!adr_next) {
  2616.                     if (
  2617.                       ( (int)(adr - r->adr) < (r->size - 4)) 
  2618.                       &&
  2619.                       (r->size > 4)
  2620.                       ) {
  2621.                       adr = r->adr + r->size - 2;
  2622.                     }
  2623.                   } else {
  2624.                     adr = adr_next;
  2625.                   }
  2626.                 }
  2627.               }
  2628.             }
  2629.           }
  2630.           
  2631.           // ----------
  2632.           // Θcrire peu α peu
  2633.           if ((opt->getmode & 1) && (ptr>0)) HT_ADD_ADR;
  2634.           lastsaved=adr;    // dernier Θcrit+1
  2635.           // ----------
  2636.           
  2637.           // pour les stats du shell si parsing trop long
  2638. #if HTS_ANALYSTE
  2639.           if (r->size)
  2640.             _hts_in_html_done=(100 * ((int) (adr - r->adr)) ) / (int)(r->size);
  2641.           if (_hts_in_html_poll) {
  2642.             _hts_in_html_poll=0;
  2643.             // temps α attendre, et remplir autant que l'on peut le cache (backing)
  2644.             back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart);        
  2645.             back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot);
  2646.             
  2647.             // Transfer rate
  2648.             engine_stats();
  2649.             
  2650.             // Refresh various stats
  2651.             HTS_STAT.stat_nsocket=back_nsoc(back,back_max);
  2652.             HTS_STAT.stat_errors=fspc(NULL,"error");
  2653.             HTS_STAT.stat_warnings=fspc(NULL,"warning");
  2654.             HTS_STAT.stat_infos=fspc(NULL,"info");
  2655.             HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr);
  2656.             HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max);
  2657.             
  2658.             if (!hts_htmlcheck_loop(back,back_max,0,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) {
  2659.               if (opt->errlog) {
  2660.                 fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF);
  2661.                 test_flush;
  2662.               } 
  2663.               *stre->exit_xh_=1;  // exit requested
  2664.               XH_uninit;
  2665.               return -1;
  2666.               //adr = r->adr + r->size;  // exit
  2667.             } else if (_hts_cancel==1) {
  2668.               // adr = r->adr + r->size;  // exit
  2669.               nofollow=1;               // moins violent
  2670.               _hts_cancel=0;
  2671.             }
  2672.           }
  2673.           
  2674.           // refresh the backing system each 2 seconds
  2675.           if (engine_stats()) {
  2676.             back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart);        
  2677.             back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot);
  2678.           }
  2679. #endif
  2680.         } while(( ((int) (adr - r->adr)) ) < r->size);
  2681. #if HTS_ANALYSTE
  2682.         _hts_in_html_parsing=0;  // flag
  2683.         _hts_cancel=0;           // pas de cancel
  2684. #endif
  2685.         if ((opt->getmode & 1) && (ptr>0)) {
  2686.           HT_ADD_END;    // achever
  2687.         }
  2688.         //
  2689.         //
  2690.         //
  2691.       }  // if !error
  2692.       
  2693.       
  2694.       if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
  2695.       // sauver fichier
  2696.       //structcheck(savename);
  2697.       //filesave(r->adr,r->size,savename);
  2698.       
  2699. #if HTS_ANALYSTE
  2700.     }  // analyse OK
  2701. #endif
  2702.  
  2703.     /* Apply changes */
  2704.     ENGINE_SAVE_CONTEXT();
  2705.     
  2706.     return 0;
  2707. }
  2708.  
  2709.  
  2710.  
  2711.  
  2712. /*
  2713.   Check 301, 302, .. statuscodes (moved)
  2714. */
  2715. int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
  2716.   /* Load engine variables */
  2717.   ENGINE_LOAD_CONTEXT();  
  2718.   
  2719.   // DEBUT rattrapage des 301,302,307..
  2720.   // ------------------------------------------------------------
  2721.   if (!error) {
  2722.     ////////{
  2723.     // on a chargΘ un fichier en plus
  2724.     // if (!error) stat_loaded+=r.size;
  2725.     
  2726.     // ------------------------------------------------------------
  2727.     // Rattrapage des 301,302,307 (moved) et 412,416 - les 304 le sont dans le backing 
  2728.     // ------------------------------------------------------------
  2729.     if ( (r->statuscode==301) 
  2730.       || (r->statuscode==302)
  2731.       || (r->statuscode==303)
  2732.       || (r->statuscode==307)
  2733.       ) {          
  2734.       //if (r->adr!=NULL) {   // adr==null si fichier direct. [catch: davename normalement si cgi]
  2735.       //int i=0;
  2736.       char *rn=NULL;
  2737.       // char* p;
  2738.       
  2739.       if ( (opt->debug>0) && (opt->errlog!=NULL) ) {
  2740.         //if (opt->errlog) {
  2741.         fspc(opt->errlog,"warning"); fprintf(opt->errlog,"%s for %s%s"LF,r->msg,urladr,urlfil);
  2742.         test_flush;
  2743.       }
  2744.       
  2745.       
  2746.       {
  2747.         char mov_url[HTS_URLMAXSIZE*2],mov_adr[HTS_URLMAXSIZE*2],mov_fil[HTS_URLMAXSIZE*2];
  2748.         int get_it=0;         // ne pas prendre le fichier α la mΩme adresse par dΘfaut
  2749.         int reponse=0;
  2750.         mov_url[0]='\0'; mov_adr[0]='\0'; mov_fil[0]='\0';
  2751.         //
  2752.         
  2753.         strcpybuff(mov_url,r->location);
  2754.         
  2755.         // url qque -> adresse+fichier
  2756.         if ((reponse=ident_url_relatif(mov_url,urladr,urlfil,mov_adr,mov_fil))>=0) {                        
  2757.           int set_prio_to=0;    // pas de priotitΘ fixΘd par wizard
  2758.           
  2759.           //if (ident_url_absolute(mov_url,mov_adr,mov_fil)!=-1) {    // ok URL reconnue
  2760.           // c'est (en gros) la mΩme URL..
  2761.           // si c'est un problΦme de casse dans le host c'est que le serveur est buggΘ
  2762.           // ("RFC says.." : host name IS case insensitive)
  2763.           if ((strfield2(mov_adr,urladr)!=0) && (strfield2(mov_fil,urlfil)!=0)) {  // identique α casse prΦs
  2764.             // on tourne en rond
  2765.             if (strcmp(mov_fil,urlfil)==0) {
  2766.               error=1;
  2767.               get_it=-1;        // ne rien faire
  2768.               if (opt->errlog) {
  2769.                 fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Can not bear crazy server (%s) for %s%s"LF,r->msg,urladr,urlfil);
  2770.                 test_flush;
  2771.               }
  2772.             } else {    // mauvaise casse, effacer entrΘe dans la pile et rejouer une fois
  2773.               get_it=1;
  2774.             }
  2775.           } else {        // adresse diffΘrente
  2776.             if (ishtml(mov_url)==0) {   // pas mΩme adresse MAIS c'est un fichier non html (pas de page moved possible)
  2777.               // -> on prend α cette adresse, le lien sera enregistrΘ avec lien_record() (hash)
  2778.               if ((opt->debug>1) && (opt->log!=NULL)) {
  2779.                 fspc(opt->log,"debug"); fprintf(opt->log,"wizard link test for moved file at %s%s.."LF,mov_adr,mov_fil);
  2780.                 test_flush;
  2781.               }
  2782.               // acceptΘ?
  2783.               if (hts_acceptlink(opt,ptr,lien_tot,liens,
  2784.                 mov_adr,mov_fil,
  2785.                 &filters,&filptr,opt->maxfilter,
  2786.                 robots,
  2787.                 &set_prio_to,
  2788.                 NULL) != 1) {                /* nouvelle adresse non refusΘe ? */
  2789.                 get_it=1;
  2790.                 if ((opt->debug>1) && (opt->log!=NULL)) {
  2791.                   fspc(opt->log,"debug"); fprintf(opt->log,"moved link accepted: %s%s"LF,mov_adr,mov_fil);
  2792.                   test_flush;
  2793.                 }
  2794.               }
  2795.             } /* sinon traitΘ normalement */
  2796.           }
  2797.           
  2798.           //if ((strfield2(mov_adr,urladr)!=0) && (strfield2(mov_fil,urlfil)!=0)) {  // identique α casse prΦs
  2799.           if (get_it==1) {
  2800.             // court-circuiter le reste du traitement
  2801.             // et reculer pour mieux sauter
  2802.             if (opt->errlog) {
  2803.               fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Warning moved treated for %s%s (real one is %s%s)"LF,urladr,urlfil,mov_adr,mov_fil);
  2804.               test_flush;
  2805.             }          
  2806.             // canceller lien actuel
  2807.             error=1;
  2808.             strcpybuff(liens[ptr]->adr,"!");  // caractΦre bidon (invalide hash)
  2809. #if HTS_HASH
  2810. #else
  2811.             liens[ptr]->sav_len=-1;       // taille invalide
  2812. #endif
  2813.             // noter NOUVEAU lien
  2814.             //xxc xxc
  2815.             //  set_prio_to=0+1;  // protection if the moved URL is an html page!!
  2816.             //xxc xxc
  2817.             {
  2818.               char mov_sav[HTS_URLMAXSIZE*2];
  2819.               // calculer lien et Θventuellement modifier addresse/fichier
  2820.               if (url_savename(mov_adr,mov_fil,mov_sav,NULL,NULL,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil,opt,liens,lien_tot,back,back_max,cache,hash,ptr,numero_passe)!=-1) { 
  2821.                 if (hash_read(hash,mov_sav,"",0)<0) {      // n'existe pas dΘja
  2822.                   // enregistrer lien (MACRO) avec SAV IDENTIQUE
  2823.                   liens_record(mov_adr,mov_fil,liens[ptr]->sav,"","");
  2824.                   //liens_record(mov_adr,mov_fil,mov_sav,"","");
  2825.                   if (liens[lien_tot]!=NULL) {    // OK, pas d'erreur
  2826.                     // mode test?
  2827.                     liens[lien_tot]->testmode=liens[ptr]->testmode;
  2828.                     liens[lien_tot]->link_import=0;       // mode normal
  2829.                     if (!set_prio_to)
  2830.                       liens[lien_tot]->depth=liens[ptr]->depth;
  2831.                     else
  2832.                       liens[lien_tot]->depth=max(0,min(set_prio_to-1,liens[ptr]->depth));       // PRIORITE NULLE (catch page)
  2833.                     liens[lien_tot]->pass2=max(liens[ptr]->pass2,numero_passe);
  2834.                     liens[lien_tot]->retry=liens[ptr]->retry;
  2835.                     liens[lien_tot]->premier=liens[ptr]->premier;
  2836.                     liens[lien_tot]->precedent=liens[ptr]->precedent;
  2837.                     lien_tot++;
  2838.                   } else {  // oups erreur, plus de mΘmoire!!
  2839.                     printf("PANIC! : Not enough memory [%d]\n",__LINE__);
  2840.                     if (opt->errlog) {
  2841.                       fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
  2842.                       test_flush;
  2843.                     }
  2844.                     //if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
  2845.                     XH_uninit;    // dΘsallocation mΘmoire & buffers
  2846.                     return 0;
  2847.                   }
  2848.                 } else {
  2849.                   if ( (opt->debug>0) && (opt->errlog!=NULL) ) {
  2850.                     fspc(opt->errlog,"warning"); fprintf(opt->errlog,"moving %s to an existing file %s"LF,liens[ptr]->fil,urlfil);
  2851.                     test_flush;
  2852.                   }
  2853.                 }
  2854.                 
  2855.               }
  2856.             }
  2857.             
  2858.             //printf("-> %s %s %s\n",liens[lien_tot-1]->adr,liens[lien_tot-1]->fil,liens[lien_tot-1]->sav);
  2859.             
  2860.             // note mΘtaphysique: il se peut qu'il y ait un index.html et un INDEX.HTML
  2861.             // sous DOS ca marche pas trΦs bien... mais comme je suis gΘnial url_savename()
  2862.             // est α mΩme de rΘgler ce problΦme
  2863.           }
  2864.             } // ident_url_xx
  2865.             
  2866.             if (get_it==0) {    // adresse vraiment diffΘrente et potentiellement en html (pas de possibilitΘ de bouger la page tel quel α cause des <img src..> et cie)
  2867.               rn=(char*) calloct(8192,1);
  2868.               if (rn!=NULL) {
  2869.                 if (opt->errlog) {
  2870.                   fspc(opt->errlog,"warning"); fprintf(opt->errlog,"File has moved from %s%s to %s"LF,urladr,urlfil,mov_url);
  2871.                   test_flush;
  2872.                 }
  2873.                 escape_uri(mov_url);
  2874.                 // On prΘpare une page qui sautera immΘdiatement sur la bonne URL
  2875.                 // Le scanner re-changera, ensuite, cette URL, pour la mirrorer!
  2876.                 strcpybuff(rn,"<HTML>"CRLF);
  2877.                 strcatbuff(rn,"<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"CRLF);
  2878.                 strcatbuff(rn,"<HEAD>"CRLF"<TITLE>Page has moved</TITLE>"CRLF"</HEAD>"CRLF"<BODY>"CRLF);
  2879.                 strcatbuff(rn,"<META HTTP-EQUIV=\"Refresh\" CONTENT=\"0; URL=");
  2880.                 strcatbuff(rn,mov_url);    // URL
  2881.                 strcatbuff(rn,"\">"CRLF);
  2882.                 strcatbuff(rn,"<A HREF=\"");
  2883.                 strcatbuff(rn,mov_url);
  2884.                 strcatbuff(rn,"\">");
  2885.                 strcatbuff(rn,"<B>Click here...</B></A>"CRLF);
  2886.                 strcatbuff(rn,"</BODY>"CRLF);
  2887.                 strcatbuff(rn,"<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"CRLF);
  2888.                 strcatbuff(rn,"</HTML>"CRLF);
  2889.                 
  2890.                 // changer la page
  2891.                 if (r->adr) { freet(r->adr); r->adr=NULL; }
  2892.                 r->adr=rn;
  2893.                 r->size=strlen(r->adr);
  2894.                 strcpybuff(r->contenttype,"text/html");
  2895.               }
  2896.             }  // get_it==0
  2897.             
  2898.           }     // bloc
  2899.           // erreur HTTP (ex: 404, not found)
  2900.         } else if (
  2901.           (r->statuscode==412)
  2902.           || (r->statuscode==416)
  2903.           ) {    // Precondition Failed, c'est α dire pour nous redemander TOUT le fichier
  2904.           if (fexist(liens[ptr]->sav)) {
  2905.             remove(liens[ptr]->sav);    // Eliminer
  2906.             if (!fexist(liens[ptr]->sav)) {  // Bien ΘliminΘ? (sinon on boucle..)
  2907. #if HDEBUG
  2908.               printf("Partial content NOT up-to-date, reget all file for %s\n",liens[ptr]->sav);
  2909. #endif
  2910.               if ( (opt->debug>1) && (opt->errlog!=NULL) ) {
  2911.                 //if (opt->errlog) {
  2912.                 fspc(opt->errlog,"debug"); fprintf(opt->errlog,"Partial file reget (%s) for %s%s"LF,r->msg,urladr,urlfil);
  2913.                 test_flush;
  2914.               }
  2915.               // enregistrer le MEME lien (MACRO)
  2916.               liens_record(liens[ptr]->adr,liens[ptr]->fil,liens[ptr]->sav,"","");
  2917.               if (liens[lien_tot]!=NULL) {    // OK, pas d'erreur
  2918.                 liens[lien_tot]->testmode=liens[ptr]->testmode;          // mode test?
  2919.                 liens[lien_tot]->link_import=0;       // pas mode import
  2920.                 liens[lien_tot]->depth=liens[ptr]->depth;
  2921.                 liens[lien_tot]->pass2=max(liens[ptr]->pass2,numero_passe);
  2922.                 liens[lien_tot]->retry=liens[ptr]->retry;
  2923.                 liens[lien_tot]->premier=liens[ptr]->premier;
  2924.                 liens[lien_tot]->precedent=ptr;
  2925.                 lien_tot++;
  2926.                 //
  2927.                 // canceller lien actuel
  2928.                 error=1;
  2929.                 strcpybuff(liens[ptr]->adr,"!");  // caractΦre bidon (invalide hash)
  2930. #if HTS_HASH
  2931. #else
  2932.                 liens[ptr]->sav_len=-1;       // taille invalide
  2933. #endif
  2934.                 //
  2935.               } else {  // oups erreur, plus de mΘmoire!!
  2936.                 printf("PANIC! : Not enough memory [%d]\n",__LINE__);
  2937.                 if (opt->errlog) {
  2938.                   fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
  2939.                   test_flush;
  2940.                 }
  2941.                 //if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
  2942.                 XH_uninit;    // dΘsallocation mΘmoire & buffers
  2943.                 return 0;
  2944.               } 
  2945.             } else {
  2946.               if (opt->errlog!=NULL) {
  2947.                 fspc(opt->errlog,"error"); fprintf(opt->errlog,"Can not remove old file %s"LF,urlfil);
  2948.                 test_flush;
  2949.               }
  2950.             }
  2951.           } else {
  2952.             if (opt->errlog!=NULL) {
  2953.               fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Unexpected 412/416 error (%s) for %s%s"LF,r->msg,urladr,urlfil);
  2954.               test_flush;
  2955.             }
  2956.           }
  2957.         } else if (r->statuscode!=200) {
  2958.           int can_retry=0;
  2959.           
  2960.           // cas o∙ l'on peut reessayer
  2961.           // -2=timeout -3=rateout (interne α httrack)
  2962.           switch(r->statuscode) {
  2963.             //case -1: can_retry=1; break;
  2964.           case -2: if (opt->hostcontrol) {    // timeout et retry ΘpuisΘs
  2965.             if ((opt->hostcontrol & 1) && (liens[ptr]->retry<=0)) {
  2966.               if ((opt->debug>1) && (opt->log!=NULL)) {
  2967.                 fspc(opt->log,"debug"); fprintf(opt->log,"Link banned: %s%s"LF,urladr,urlfil); test_flush;
  2968.               }
  2969.               host_ban(opt,liens,ptr,lien_tot,back,back_max,filters,opt->maxfilter,&filptr,jump_identification(urladr));
  2970.               if ((opt->debug>1) && (opt->log!=NULL)) {
  2971.                 fspc(opt->log,"debug"); fprintf(opt->log,"Info: previous log - link banned: %s%s"LF,urladr,urlfil); test_flush;
  2972.               }
  2973.             } else can_retry=1;
  2974.                    } else can_retry=1;
  2975.             break;
  2976.           case -3: if ((opt->hostcontrol) && (liens[ptr]->retry<=0)) {    // too slow
  2977.             if (opt->hostcontrol & 2) {
  2978.               if ((opt->debug>1) && (opt->log!=NULL)) {
  2979.                 fspc(opt->log,"debug"); fprintf(opt->log,"Link banned: %s%s"LF,urladr,urlfil); test_flush;
  2980.               }
  2981.               host_ban(opt,liens,ptr,lien_tot,back,back_max,filters,opt->maxfilter,&filptr,jump_identification(urladr));
  2982.               if ((opt->debug>1) && (opt->log!=NULL)) {
  2983.                 fspc(opt->log,"debug"); fprintf(opt->log,"Info: previous log - link banned: %s%s"LF,urladr,urlfil); test_flush;
  2984.               }
  2985.             } else can_retry=1;
  2986.                    } else can_retry=1;
  2987.             break;
  2988.           case -4:            // connect closed
  2989.             can_retry=1;
  2990.             break;
  2991.           case -5:            // other (non fatal) error
  2992.             can_retry=1;
  2993.             break;
  2994.           case -6:            // bad SSL handskake
  2995.             can_retry=1;
  2996.             break;
  2997.           case 408: case 409: case 500: case 502: case 504: can_retry=1;
  2998.             break;
  2999.           }
  3000.           
  3001.           if ( strcmp(liens[ptr]->fil,"/primary") != 0 ) {  // no primary (internal page 0)
  3002.             if ((liens[ptr]->retry<=0) || (!can_retry) ) {  // retry ΘpuisΘs (ou retry impossible)
  3003.               if (opt->errlog) {
  3004.                 if ((opt->retry>0) && (can_retry)){
  3005.                   fspc(opt->errlog,"error"); 
  3006.                   fprintf(opt->errlog,"\"%s\" (%d) after %d retries at link %s%s (from %s%s)"LF,r->msg,r->statuscode,opt->retry,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil);
  3007.                 } else {
  3008.                   if (r->statuscode==-10) {    // test OK
  3009.                     if ((opt->debug>0) && (opt->errlog!=NULL)) {
  3010.                       fspc(opt->errlog,"info"); 
  3011.                       fprintf(opt->errlog,"Test OK at link %s%s (from %s%s)"LF,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil);
  3012.                     }
  3013.                   } else {
  3014.                     if (strcmp(urlfil,"/robots.txt")) {       // ne pas afficher d'infos sur robots.txt par dΘfaut
  3015.                       fspc(opt->errlog,"error"); 
  3016.                       fprintf(opt->errlog,"\"%s\" (%d) at link %s%s (from %s%s)"LF,r->msg,r->statuscode,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil);
  3017.                     } else {
  3018.                       if (opt->debug>1) {
  3019.                         fspc(opt->errlog,"info"); fprintf(opt->errlog,"No robots.txt rules at %s"LF,urladr);
  3020.                         test_flush;
  3021.                       }
  3022.                     }
  3023.                   }
  3024.                 }
  3025.                 test_flush;
  3026.               }
  3027.               
  3028.               // NO error in trop level
  3029.               // due to the "no connection -> previous restored" hack
  3030.               // This prevent the engine from wiping all data if the website has been deleted (or moved)
  3031.               // since last time (which is quite annoying)
  3032.               if (liens[ptr]->precedent != 0) {
  3033.                 // ici on teste si on doit enregistrer la page tout de mΩme
  3034.                 if (opt->errpage) {
  3035.                   store_errpage=1;
  3036.                 }
  3037.               } else {
  3038.                 if (strcmp(urlfil,"/robots.txt") != 0) {
  3039.                 /*
  3040.                 This is an error caused by a link entered by the user
  3041.                 That is, link(s) entered by user are invalid (404, 500, connect error, proxy error->.)
  3042.                 If all links entered are invalid, the session failed and we will attempt to restore
  3043.                 the previous one
  3044.                 Example: Try to update a website which has been deleted remotely: this may delete
  3045.                 the website locally, which is really not desired (especially if the website disappeared!)
  3046.                 With this hack, the engine won't wipe local files (how clever)
  3047.                   */
  3048.                   HTS_STAT.stat_errors_front++;
  3049.                 }
  3050.               }
  3051.               
  3052.             } else {    // retry!!
  3053.               if (opt->debug>0 && opt->errlog != NULL) {  // on fera un alert si le retry Θchoue               
  3054.                 fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Retry after error %d (%s) at link %s%s (from %s%s)"LF,r->statuscode,r->msg,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil);
  3055.                 test_flush;
  3056.               }
  3057.               // redemander fichier
  3058.               liens_record(urladr,urlfil,savename,"","");
  3059.               if (liens[lien_tot]!=NULL) {    // OK, pas d'erreur
  3060.                 liens[lien_tot]->testmode=liens[ptr]->testmode;          // mode test?
  3061.                 liens[lien_tot]->link_import=0;       // pas mode import
  3062.                 liens[lien_tot]->depth=liens[ptr]->depth;
  3063.                 liens[lien_tot]->pass2=max(liens[ptr]->pass2,numero_passe);
  3064.                 liens[lien_tot]->retry=liens[ptr]->retry-1;    // moins 1 retry!
  3065.                 liens[lien_tot]->premier=liens[ptr]->premier;
  3066.                 liens[lien_tot]->precedent=liens[ptr]->precedent;
  3067.                 lien_tot++;
  3068.               } else {  // oups erreur, plus de mΘmoire!!
  3069.                 printf("PANIC! : Not enough memory [%d]\n",__LINE__);
  3070.                 if (opt->errlog) {
  3071.                   fspc(opt->errlog,"panic"); 
  3072.                   fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
  3073.                   test_flush;
  3074.                 }
  3075.                 //if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
  3076.                 XH_uninit;    // dΘsallocation mΘmoire & buffers
  3077.                 return 0;
  3078.               } 
  3079.             }
  3080.           } else {
  3081.             if (opt->errlog) {
  3082.               if (opt->debug>1) {
  3083.                 fspc(opt->errlog,"info"); 
  3084.                 fprintf(opt->errlog,"Info: no robots.txt at %s%s"LF,urladr,urlfil);
  3085.               }
  3086.             }
  3087.           }
  3088.           if (!store_errpage) {
  3089.             if (r->adr) { freet(r->adr); r->adr=NULL; }  // dΘsalloc
  3090.             error=1;  // erreur!
  3091.           }
  3092.         }
  3093.         // FIN rattrapage des 301,302,307..
  3094.         // ------------------------------------------------------------
  3095.         
  3096.       }  // if !error
  3097.       
  3098.       
  3099.       /* Apply changes */
  3100.       ENGINE_SAVE_CONTEXT();
  3101.       
  3102.       return 0;
  3103.       
  3104.       
  3105. }
  3106.  
  3107.  
  3108.  
  3109. /*
  3110.   Check 301, 302, .. statuscodes (moved)
  3111. */
  3112. int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
  3113.   /* Load engine variables */
  3114.   ENGINE_LOAD_CONTEXT();
  3115.   /* */
  3116.   int b;
  3117.   int n;
  3118.   
  3119. #if BDEBUG==1
  3120.   printf("\nBack test..\n");
  3121. #endif
  3122.   
  3123.   // pause/lock files
  3124.   {
  3125.     int do_pause=0;
  3126.     
  3127.     // user pause lockfile : create hts-paused.lock --> HTTrack will be paused
  3128.     if (fexist(fconcat(opt->path_log,"hts-stop.lock"))) {
  3129.       // remove lockfile
  3130.       remove(fconcat(opt->path_log,"hts-stop.lock"));
  3131.       if (!fexist(fconcat(opt->path_log,"hts-stop.lock"))) {
  3132.         do_pause=1;
  3133.       }
  3134.     }
  3135.     
  3136.     // after receving N bytes, pause
  3137.     if (opt->fragment>0) {
  3138.       if ((HTS_STAT.stat_bytes-stat_fragment) > opt->fragment) {
  3139.         do_pause=1;
  3140.       }
  3141.     }
  3142.     
  3143.     // pause?
  3144.     if (do_pause) {
  3145.       if ( (opt->debug>0) && (opt->log!=NULL) ) {
  3146.         fspc(opt->log,"info"); fprintf(opt->log,"engine: pause requested.."LF);
  3147.       }
  3148.       while (back_nsoc(back,back_max)>0) {                  // attendre fin des transferts
  3149.         back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart);
  3150.         Sleep(200);
  3151. #if HTS_ANALYSTE
  3152.         {
  3153.           back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart);
  3154.           
  3155.           // Transfer rate
  3156.           engine_stats();
  3157.           
  3158.           // Refresh various stats
  3159.           HTS_STAT.stat_nsocket=back_nsoc(back,back_max);
  3160.           HTS_STAT.stat_errors=fspc(NULL,"error");
  3161.           HTS_STAT.stat_warnings=fspc(NULL,"warning");
  3162.           HTS_STAT.stat_infos=fspc(NULL,"info");
  3163.           HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr);
  3164.           HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max);
  3165.           
  3166.           b=0;
  3167.           if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) {
  3168.             if (opt->errlog) {
  3169.               fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF);
  3170.               test_flush;
  3171.             }
  3172.             *stre->exit_xh_=1;  // exit requested
  3173.             XH_uninit;
  3174.             return 0;
  3175.           }
  3176.         }
  3177. #endif
  3178.       }
  3179.       // On dΘsalloue le buffer d'enregistrement des chemins crΘΘe, au cas o∙ pendant la pause
  3180.       // l'utilisateur ferait un rm -r aprΦs avoir effectuΘ un tar
  3181.       structcheck_init(1);
  3182.       {
  3183.         FILE* fp = fopen(fconcat(opt->path_log,"hts-paused.lock"),"wb");
  3184.         if (fp) {
  3185.           fspc(fp,"info");  // dater
  3186.           fprintf(fp,"Pause"LF"HTTrack is paused after retreiving "LLintP" bytes"LF"Delete this file to continue the mirror->.."LF""LF"",(LLint)HTS_STAT.stat_bytes);
  3187.           fclose(fp);
  3188.         }
  3189.       }
  3190.       stat_fragment=HTS_STAT.stat_bytes;
  3191.       /* Info for wrappers */
  3192.       if ( (opt->debug>0) && (opt->log!=NULL) ) {
  3193.         fspc(opt->log,"info"); fprintf(opt->log,"engine: pause: %s"LF,fconcat(opt->path_log,"hts-paused.lock"));
  3194.       }
  3195. #if HTS_ANALYSTE
  3196.       hts_htmlcheck_pause(fconcat(opt->path_log,"hts-paused.lock"));
  3197. #else
  3198.       while (fexist(fconcat(opt->path_log,"hts-paused.lock"))) {
  3199.         //back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart);   inutile!! (plus de sockets actives)
  3200.         Sleep(1000);
  3201.       }
  3202. #endif
  3203.     }
  3204.     //
  3205.   }
  3206.   // end of pause/lock files
  3207.   
  3208. #if HTS_ANALYSTE
  3209.   // changement dans les prΘfΘrences
  3210.   /*
  3211.   if (_hts_setopt) {
  3212.   copy_htsopt(_hts_setopt,opt);    // copier au besoin
  3213.   _hts_setopt=NULL;                 // effacer callback
  3214.   }
  3215.   */
  3216.   if (_hts_addurl) {
  3217.     char add_adr[HTS_URLMAXSIZE*2];
  3218.     char add_fil[HTS_URLMAXSIZE*2];
  3219.     while(*_hts_addurl) {
  3220.       char add_url[HTS_URLMAXSIZE*2];
  3221.       add_adr[0]=add_fil[0]=add_url[0]='\0';
  3222.       if (!link_has_authority(*_hts_addurl))
  3223.         strcpybuff(add_url,"http://");          // ajouter http://
  3224.       strcatbuff(add_url,*_hts_addurl);
  3225.       if (ident_url_absolute(add_url,add_adr,add_fil)>=0) {
  3226.         // ----Ajout----
  3227.         // noter NOUVEAU lien
  3228.         char add_sav[HTS_URLMAXSIZE*2];
  3229.         // calculer lien et Θventuellement modifier addresse/fichier
  3230.         if (url_savename(add_adr,add_fil,add_sav,NULL,NULL,NULL,NULL,opt,liens,lien_tot,back,back_max,cache,hash,ptr,numero_passe)!=-1) { 
  3231.           if (hash_read(hash,add_sav,"",0)<0) {      // n'existe pas dΘja
  3232.             // enregistrer lien (MACRO)
  3233.             liens_record(add_adr,add_fil,add_sav,"","");
  3234.             if (liens[lien_tot]!=NULL) {    // OK, pas d'erreur
  3235.               liens[lien_tot]->testmode=0;          // mode test?
  3236.               liens[lien_tot]->link_import=0;       // mode normal
  3237.               liens[lien_tot]->depth=opt->depth;
  3238.               liens[lien_tot]->pass2=max(0,numero_passe);
  3239.               liens[lien_tot]->retry=opt->retry;
  3240.               liens[lien_tot]->premier=lien_tot;
  3241.               liens[lien_tot]->precedent=lien_tot;
  3242.               lien_tot++;
  3243.               //
  3244.               if ((opt->debug>0) && (opt->log!=NULL)) {
  3245.                 fspc(opt->log,"info"); fprintf(opt->log,"Link added by user: %s%s"LF,add_adr,add_fil); test_flush;
  3246.               }
  3247.               //
  3248.             } else {  // oups erreur, plus de mΘmoire!!
  3249.               printf("PANIC! : Not enough memory [%d]\n",__LINE__);
  3250.               if (opt->errlog) {
  3251.                 fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
  3252.                 test_flush;
  3253.               }
  3254.               //if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
  3255.               XH_uninit;    // dΘsallocation mΘmoire & buffers
  3256.               return 0;
  3257.             }
  3258.           } else {
  3259.             if ( (opt->debug>0) && (opt->errlog!=NULL) ) {
  3260.               fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Existing link %s%s not added after user request"LF,add_adr,add_fil);
  3261.               test_flush;
  3262.             }
  3263.           }
  3264.           
  3265.         }
  3266.       } else {
  3267.         if (opt->errlog) {
  3268.           fspc(opt->errlog,"error");
  3269.           fprintf(opt->errlog,"Error during URL decoding for %s"LF,add_url);
  3270.           test_flush;
  3271.         }
  3272.       }
  3273.       // ----Fin Ajout----
  3274.       _hts_addurl++;                  // suivante
  3275.     }
  3276.     _hts_addurl=NULL;           // libΘrer _hts_addurl
  3277.   }
  3278.   // si une pause a ΘtΘ demandΘe
  3279.   if (_hts_setpause) {
  3280.     // index du lien actuel
  3281.     int b=back_index(back,back_max,urladr,urlfil,savename);
  3282.     if (b<0) b=0;    // forcer pour les stats
  3283.     while(_hts_setpause) {    // on fait la pause..
  3284.       back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart);
  3285.       
  3286.       // Transfer rate
  3287.       engine_stats();
  3288.       
  3289.       // Refresh various stats
  3290.       HTS_STAT.stat_nsocket=back_nsoc(back,back_max);
  3291.       HTS_STAT.stat_errors=fspc(NULL,"error");
  3292.       HTS_STAT.stat_warnings=fspc(NULL,"warning");
  3293.       HTS_STAT.stat_infos=fspc(NULL,"info");
  3294.       HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr);
  3295.       HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max);
  3296.       
  3297.       if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) {
  3298.         if (opt->errlog) {
  3299.           fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF);
  3300.           test_flush;
  3301.         }
  3302.         *stre->exit_xh_=1;  // exit requested
  3303.         XH_uninit;
  3304.         return 0;
  3305.       }
  3306.       if (back_nsoc(back,back_max)==0)
  3307.         Sleep(250);  // tite pause
  3308.     }
  3309.   }
  3310. #endif
  3311.   
  3312.   // si le fichier n'est pas en backing, le mettre..
  3313.   if (!back_exist(back,back_max,urladr,urlfil,savename)) {
  3314. #if BDEBUG==1
  3315.     printf("crash backing: %s%s\n",liens[ptr]->adr,liens[ptr]->fil);
  3316. #endif
  3317.     if (back_add(back,back_max,opt,cache,urladr,urlfil,savename,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil,liens[ptr]->testmode,&liens[ptr]->pass2)==-1) {
  3318.       printf("PANIC! : Crash adding error, unexpected error found.. [%d]\n",__LINE__);
  3319. #if BDEBUG==1
  3320.       printf("error while crash adding\n");
  3321. #endif
  3322.       if (opt->errlog) {
  3323.         fspc(opt->errlog,"error"); fprintf(opt->errlog,"Unexpected backing error for %s%s"LF,urladr,urlfil);
  3324.         test_flush;
  3325.       } 
  3326.       
  3327.     }
  3328.   }
  3329.   
  3330. #if BDEBUG==1
  3331.   printf("test number of socks\n");
  3332. #endif
  3333.   
  3334.   // ajouter autant de socket qu'on peut ajouter
  3335.   n=opt->maxsoc-back_nsoc(back,back_max);
  3336. #if BDEBUG==1
  3337.   printf("%d sockets available for backing\n",n);
  3338. #endif
  3339.   
  3340. #if HTS_ANALYSTE
  3341.   if ((n>0) && (!_hts_setpause)) {   // si sockets libre et pas en pause, ajouter
  3342. #else
  3343.     if (n>0) {                         // si sockets libre
  3344. #endif
  3345.       // remplir autant que l'on peut le cache (backing)
  3346.       back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot);
  3347.     }
  3348.     
  3349.     // index du lien actuel
  3350.     /*
  3351.     b=back_index(back,back_max,urladr,urlfil,savename);
  3352.     
  3353.       if (b>=0) 
  3354.     */
  3355.     {
  3356.       // ------------------------------------------------------------
  3357.       // attendre que le fichier actuel soit prΩt - BOUCLE D'ATTENTE
  3358.       do {
  3359.         
  3360.         // index du lien actuel
  3361.         b=back_index(back,back_max,urladr,urlfil,savename);
  3362. #if BDEBUG==1
  3363.         printf("back index %d, waiting\n",b);
  3364. #endif
  3365.         // Continue to the loop if link still present
  3366.         if (b<0)
  3367.           continue;
  3368.         
  3369.         // Receive data
  3370.         if (back[b].status>0)
  3371.           back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart);
  3372.         
  3373.         // Continue to the loop if link still present
  3374.         b=back_index(back,back_max,urladr,urlfil,savename);
  3375.         if (b<0)
  3376.           continue;
  3377.         
  3378.         // And fill the backing stack
  3379.         if (back[b].status>0)
  3380.           back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot);
  3381.         
  3382.         // Continue to the loop if link still present
  3383.         b=back_index(back,back_max,urladr,urlfil,savename);
  3384.         if (b<0)
  3385.           continue;
  3386.         
  3387.         // autres occupations de HTTrack: statistiques, boucle d'attente, etc.
  3388.         if ((opt->makestat) || (opt->maketrack)) {
  3389.           TStamp l=time_local();
  3390.           if ((int) (l-makestat_time) >= 60) {   
  3391.             if (makestat_fp != NULL) {
  3392.               fspc(makestat_fp,"info");
  3393.               fprintf(makestat_fp,"Rate= %d (/"LLintP") \11NewLinks= %d (/%d)"LF,(int) ((HTS_STAT.HTS_TOTAL_RECV-*stre->makestat_total_)/(l-makestat_time)), (LLint)HTS_STAT.HTS_TOTAL_RECV,(int) lien_tot-*stre->makestat_lnk_,(int) lien_tot);
  3394.               fflush(makestat_fp);
  3395.               *stre->makestat_total_=HTS_STAT.HTS_TOTAL_RECV;
  3396.               *stre->makestat_lnk_=lien_tot;
  3397.             }
  3398.             if (stre->maketrack_fp != NULL) {
  3399.               int i;
  3400.               fspc(stre->maketrack_fp,"info"); fprintf(stre->maketrack_fp,LF);
  3401.               for(i=0;i<back_max;i++) {
  3402.                 back_info(back,i,3,stre->maketrack_fp);
  3403.               }
  3404.               fprintf(stre->maketrack_fp,LF);
  3405.               
  3406.             }
  3407.             makestat_time=l;
  3408.           }
  3409.         }
  3410. #if HTS_ANALYSTE
  3411.         {
  3412.           int i;
  3413.           {
  3414.             char* s=hts_cancel_file("");
  3415.             if (strnotempty(s)) {    // fichier α canceller
  3416.               for(i=0;i<back_max;i++) {
  3417.                 if ((back[i].status>0)) {
  3418.                   if (strcmp(back[i].url_sav,s)==0) {  // ok trouvΘ
  3419.                     if (back[i].status != 1000) {
  3420. #if HTS_DEBUG_CLOSESOCK
  3421.                       DEBUG_W("user cancel: deletehttp\n");
  3422. #endif
  3423.                       if (back[i].r.soc!=INVALID_SOCKET) deletehttp(&back[i].r);
  3424.                       back[i].r.soc=INVALID_SOCKET;
  3425.                       back[i].r.statuscode=-1;
  3426.                       strcpybuff(back[i].r.msg,"Cancelled by User");
  3427.                       back[i].status=0;  // terminΘ
  3428.                     } else    // cancel ftp.. flag α 1
  3429.                       back[i].stop_ftp = 1;
  3430.                   }
  3431.                 }
  3432.               }
  3433.               s[0]='\0';
  3434.             }
  3435.           }
  3436.           
  3437.           // Transfer rate
  3438.           engine_stats();
  3439.           
  3440.           // Refresh various stats
  3441.           HTS_STAT.stat_nsocket=back_nsoc(back,back_max);
  3442.           HTS_STAT.stat_errors=fspc(NULL,"error");
  3443.           HTS_STAT.stat_warnings=fspc(NULL,"warning");
  3444.           HTS_STAT.stat_infos=fspc(NULL,"info");
  3445.           HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr);
  3446.           HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max);
  3447.           
  3448.           if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) {
  3449.             if (opt->errlog) {
  3450.               fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF);
  3451.               test_flush;
  3452.             } 
  3453.             *stre->exit_xh_=1;  // exit requested
  3454.             XH_uninit;
  3455.             return 0;
  3456.           }
  3457.         }
  3458.         
  3459. #endif
  3460. #if HTS_POLL
  3461.         if ((opt->shell) || (opt->keyboard) || (opt->verbosedisplay) || (!opt->quiet)) {
  3462.           TStamp tl;
  3463.           *stre->info_shell_=1;
  3464.           
  3465.           /* Toggle with ENTER */
  3466.           if (!opt->quiet) {
  3467.             if (check_stdin()) {
  3468.               char com[256];
  3469.               linput(stdin,com,200);
  3470.               if (opt->verbosedisplay==2)
  3471.                 opt->verbosedisplay=1;
  3472.               else
  3473.                 opt->verbosedisplay=2;
  3474.               /* Info for wrappers */
  3475.               if ( (opt->debug>0) && (opt->log!=NULL) ) {
  3476.                 fspc(opt->log,"info"); fprintf(opt->log,"engine: change-options"LF);
  3477.               }
  3478. #if HTS_ANALYSTE
  3479.               hts_htmlcheck_chopt(opt);
  3480. #endif
  3481.             }
  3482.           }
  3483.           
  3484.           tl=time_local();
  3485.           
  3486.           // gΘnΘrer un message d'infos sur l'Θtat actuel
  3487.           if (opt->shell) {    // si shell
  3488.             if ((tl-*stre->last_info_shell_)>0) {    // toute les 1 sec
  3489.               FILE* fp=stdout;
  3490.               int a=0;
  3491.               *stre->last_info_shell_=tl;
  3492.               if (fexist(fconcat(opt->path_log,"hts-autopsy"))) {  // dΘbuggage: teste si le robot est vivant
  3493.                 // (oui je sais un robot vivant.. mais bon.. il a le droit de vivre lui aussi)
  3494.                 // (libΘrons les robots esclaves de l'internet!)
  3495.                 remove(fconcat(opt->path_log,"hts-autopsy"));
  3496.                 fp=fopen(fconcat(opt->path_log,"hts-isalive"),"wb");
  3497.                 a=1;
  3498.               }
  3499.               if ((*stre->info_shell_) || a) {
  3500.                 int i,j;
  3501.                 
  3502.                 fprintf(fp,"TIME %d"LF,(int) (tl-HTS_STAT.stat_timestart));
  3503.                 fprintf(fp,"TOTAL %d"LF,(int) HTS_STAT.stat_bytes);
  3504.                 fprintf(fp,"RATE %d"LF,(int) (HTS_STAT.HTS_TOTAL_RECV/(tl-HTS_STAT.stat_timestart)));
  3505.                 fprintf(fp,"SOCKET %d"LF,back_nsoc(back,back_max));
  3506.                 fprintf(fp,"LINK %d"LF,lien_tot);
  3507.                 {
  3508.                   LLint mem=0;
  3509.                   for(i=0;i<back_max;i++)
  3510.                     if (back[i].r.adr!=NULL)
  3511.                       mem+=back[i].r.size;
  3512.                     fprintf(fp,"INMEM "LLintP""LF,(LLint)mem);
  3513.                 }
  3514.                 for(j=0;j<2;j++) {  // passes pour ready et wait
  3515.                   for(i=0;i<back_max;i++) {
  3516.                     back_info(back,i,j+1,stdout);    // maketrack_fp a la place de stdout ?? // **
  3517.                   }
  3518.                 }
  3519.                 fprintf(fp,LF);
  3520.                 if (a)
  3521.                   fclose(fp);
  3522.                 io_flush;
  3523.               }
  3524.             }
  3525.           }  // si shell
  3526.           
  3527.         }  // si shell ou keyboard (option)
  3528.         //
  3529. #endif
  3530.             } while((b>=0) && (back[max(b,0)].status>0));
  3531.             
  3532.             
  3533.             // If link not found on the stack, it's because it has already been downloaded
  3534.             // in background
  3535.             // Then, skip it and go to the next one
  3536.             if (b<0) {
  3537.               if ((opt->debug>1) && (opt->log!=NULL)) {
  3538.                 fspc(opt->log,"debug"); fprintf(opt->log,"link #%d is ready, no more on the stack, skipping: %s%s.."LF,ptr,urladr,urlfil);
  3539.                 test_flush;
  3540.               }
  3541.               
  3542.               // prochain lien
  3543.               // ptr++;
  3544.               
  3545.               return 2; // goto jump_if_done;
  3546.             }
  3547.             
  3548.             
  3549. #if HTS_ANALYSTE==2
  3550. #else
  3551.             //if (!opt->quiet) {  // petite animation
  3552.             if (!opt->verbosedisplay) {
  3553.               if (!opt->quiet) {
  3554.                 static int roll=0;  /* static: ok */
  3555.                 roll=(roll+1)%4;
  3556.                 printf("%c\x0d",("/-\\|")[roll]);
  3557.                 fflush(stdout);
  3558.               }
  3559.             } else if (opt->verbosedisplay==1) {
  3560.               if (back[b].r.statuscode==200)
  3561.                 printf("%d/%d: %s%s ("LLintP" bytes) - OK\33[K\r",ptr,lien_tot,back[b].url_adr,back[b].url_fil,(LLint)back[b].r.size);
  3562.               else
  3563.                 printf("%d/%d: %s%s ("LLintP" bytes) - %d\33[K\r",ptr,lien_tot,back[b].url_adr,back[b].url_fil,(LLint)back[b].r.size,back[b].r.statuscode);
  3564.               fflush(stdout);
  3565.             }
  3566.             //}
  3567. #endif
  3568.             // ------------------------------------------------------------
  3569.             // VΘrificateur d'intΘgritΘ
  3570. #if DEBUG_CHECKINT
  3571.             _CHECKINT(&back[b],"Retour de back_wait, aprΦs le while")
  3572.             {
  3573.               int i;
  3574.               for(i=0;i<back_max;i++) {
  3575.                 char si[256];
  3576.                 sprintf(si,"Test global aprΦs back_wait, index %d",i);
  3577.                 _CHECKINT(&back[i],si)
  3578.               }
  3579.             }
  3580. #endif
  3581.             
  3582.             // copier structure rΘponse htsblk
  3583.             memcpy(r, &(back[b].r), sizeof(htsblk));
  3584.             r->location=stre->loc_;    // ne PAS copier location!! adresse, pas de buffer
  3585.             if (back[b].r.location) 
  3586.               strcpybuff(r->location,back[b].r.location);
  3587.             back[b].r.adr=NULL;    // ne pas faire de desalloc ensuite
  3588.             
  3589.             // libΘrer emplacement backing
  3590.             back_maydelete(back,opt,b);
  3591.             
  3592.             // progression
  3593. #if 0
  3594.             if (opt->aff_progress) {
  3595.               TStamp tl=time_local();
  3596.               if ((tl-HTS_STAT.stat_timestart)>0) {
  3597.                 char s[32];
  3598.                 int i=0;
  3599.                 lastime=tl;
  3600.                 _CLRSCR; _GOTOXY("1","1");
  3601.                 printf("Rate=%d B/sec\n",(int) (HTS_STAT.HTS_TOTAL_RECV/(tl-HTS_STAT.stat_timestart)));
  3602.                 while(i<minimum(back_max,99)) {  // **
  3603.                   if (back[i].status>=0) {  // loading..
  3604.                     s[0]='\0';
  3605.                     if (strlen(back[i].url_fil)>16)
  3606.                       strcatbuff(s,back[i].url_fil+strlen(back[i].url_fil)-16);       
  3607.                     else
  3608.                       strncatbuff(s,back[i].url_fil,16);
  3609.                     printf("%s : ",s);
  3610.                     
  3611.                     printf("[");
  3612.                     if (back[i].r.totalsize>0) {
  3613.                       int p;
  3614.                       int j;
  3615.                       p=(int)((back[i].r.size*10)/back[i].r.totalsize);
  3616.                       p=minimum(10,p);
  3617.                       for(j=0;j<p;j++) printf("*");
  3618.                       for(j=0;j<(10-p);j++) printf("-");
  3619.                     } else { 
  3620.                       printf(LLintP,(LLint)back[i].r.size);                      
  3621.                     }
  3622.                     printf("]");
  3623.                     
  3624.                     //} else if (back[i].status==0) {
  3625.                     //  strcpybuff(s,"ENDED");
  3626.                   } 
  3627.                   printf("\n");
  3628.                   i++;
  3629.                 }
  3630.                 io_flush;
  3631.               }
  3632.             }
  3633. #endif
  3634.             
  3635.             // dΘbug graphique
  3636. #if BDEBUG==2
  3637.             {
  3638.               char s[12];
  3639.               int i=0;
  3640.               _GOTOXY(1,1);
  3641.               printf("Rate=%d B/sec\n",(int) (HTS_STAT.HTS_TOTAL_RECV/(time_local()-HTS_STAT.stat_timestart)));
  3642.               while(i<minimum(back_max,160)) {
  3643.                 if (back[i].status>0) {
  3644.                   sprintf(s,"%d",back[i].r.size);
  3645.                 } else if (back[i].status==0) {
  3646.                   strcpybuff(s,"ENDED");
  3647.                 } else 
  3648.                   strcpybuff(s,"   -   ");
  3649.                 while(strlen(s)<8) strcatbuff(s," ");
  3650.                 printf("%s",s); io_flush;
  3651.                 i++;
  3652.               }
  3653.             }
  3654. #endif
  3655.             
  3656.             
  3657. #if BDEBUG==1
  3658.             printf("statuscode=%d with %s / msg=%s\n",r->statuscode,r->contenttype,r->msg);
  3659. #endif
  3660.             
  3661.           }
  3662.           /*else {
  3663.           #if BDEBUG==1
  3664.           printf("back index error\n");
  3665.           #endif
  3666.           }
  3667.           */
  3668.           
  3669.           
  3670.           
  3671.           ENGINE_SAVE_CONTEXT();
  3672.  
  3673.           return 0;
  3674.           
  3675.           
  3676. }
  3677.  
  3678.  
  3679.